From 718073efd2a6147c4dd553071dc5cbc2a2debc09 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Thu, 26 Jun 2025 14:07:07 -0400 Subject: [PATCH 01/19] non-serde conversion --- src/bson.rs | 24 ++++--- src/raw.rs | 2 + src/raw/bson.rs | 8 +-- src/raw/bson_ref.rs | 17 ++--- src/raw/cstr.rs | 132 ++++++++++++++++++++++++++++++++++++ src/raw/document.rs | 6 +- src/raw/iter.rs | 19 +++--- src/raw/test.rs | 10 +-- src/raw/test/append.rs | 4 +- src/raw/test/props.rs | 2 +- src/tests/modules/bson.rs | 13 ++-- src/tests/modules/macros.rs | 3 +- 12 files changed, 192 insertions(+), 48 deletions(-) create mode 100644 src/raw/cstr.rs diff --git a/src/bson.rs b/src/bson.rs index ab778734..d33d7c87 100644 --- a/src/bson.rs +++ b/src/bson.rs @@ -31,7 +31,7 @@ use std::{ use serde_json::{json, Value}; pub use crate::document::Document; -use crate::{base64, oid, spec::ElementType, Binary, Decimal128}; +use crate::{base64, oid, raw::CString, spec::ElementType, Binary, Decimal128}; /// Possible BSON value types. #[derive(Clone, Default, PartialEq)] @@ -480,14 +480,14 @@ impl Bson { Bson::Boolean(v) => json!(v), Bson::Null => Value::Null, Bson::RegularExpression(Regex { pattern, options }) => { - let mut chars: Vec<_> = options.chars().collect(); + let mut chars: Vec<_> = options.as_str().chars().collect(); chars.sort_unstable(); let options: String = chars.into_iter().collect(); json!({ "$regularExpression": { - "pattern": pattern, + "pattern": pattern.into_string(), "options": options, } }) @@ -1147,7 +1147,7 @@ impl Timestamp { #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub struct Regex { /// The regex pattern to match. - pub pattern: String, + pub pattern: CString, /// The options for the regex. /// @@ -1156,18 +1156,22 @@ pub struct Regex { /// multiline matching, 'x' for verbose mode, 'l' to make \w, \W, etc. locale dependent, /// 's' for dotall mode ('.' matches everything), and 'u' to make \w, \W, etc. match /// unicode. - pub options: String, + pub options: CString, } impl Regex { - pub(crate) fn new(pattern: impl AsRef, options: impl AsRef) -> Self { + #[cfg(test)] + pub(crate) fn new( + pattern: impl AsRef, + options: impl AsRef, + ) -> crate::error::Result { let mut chars: Vec<_> = options.as_ref().chars().collect(); chars.sort_unstable(); let options: String = chars.into_iter().collect(); - Self { - pattern: pattern.as_ref().to_string(), - options, - } + Ok(Self { + pattern: pattern.as_ref().to_string().try_into()?, + options: options.try_into()?, + }) } } diff --git a/src/raw.rs b/src/raw.rs index 3444a61d..6041af6c 100644 --- a/src/raw.rs +++ b/src/raw.rs @@ -116,6 +116,7 @@ mod array; mod array_buf; mod bson; mod bson_ref; +mod cstr; mod document; mod document_buf; mod iter; @@ -142,6 +143,7 @@ pub use self::{ RawJavaScriptCodeWithScopeRef, RawRegexRef, }, + cstr::{assert_valid_cstr, cstr, validate_cstr, CStr, CString, IsValidCStr}, document::RawDocument, document_buf::{BindRawBsonRef, BindValue, RawDocumentBuf}, iter::{RawElement, RawIter}, diff --git a/src/raw/bson.rs b/src/raw/bson.rs index fb2c7252..e07d8459 100644 --- a/src/raw/bson.rs +++ b/src/raw/bson.rs @@ -204,8 +204,8 @@ impl RawBson { pub fn as_regex(&self) -> Option> { match self { RawBson::RegularExpression(v) => Some(RawRegexRef { - pattern: v.pattern.as_str(), - options: v.options.as_str(), + pattern: v.pattern.as_ref(), + options: v.options.as_ref(), }), _ => None, } @@ -289,8 +289,8 @@ impl RawBson { RawBson::Boolean(b) => RawBsonRef::Boolean(*b), RawBson::Null => RawBsonRef::Null, RawBson::RegularExpression(re) => RawBsonRef::RegularExpression(RawRegexRef { - options: re.options.as_str(), - pattern: re.pattern.as_str(), + options: re.options.as_ref(), + pattern: re.pattern.as_ref(), }), RawBson::JavaScriptCode(c) => RawBsonRef::JavaScriptCode(c.as_str()), RawBson::JavaScriptCodeWithScope(code_w_scope) => { diff --git a/src/raw/bson_ref.rs b/src/raw/bson_ref.rs index a2847734..5009e05c 100644 --- a/src/raw/bson_ref.rs +++ b/src/raw/bson_ref.rs @@ -3,7 +3,7 @@ use std::convert::{TryFrom, TryInto}; use super::{bson::RawBson, Error, RawArray, RawDocument, Result}; use crate::{ oid::{self, ObjectId}, - raw::{write_cstring, write_string, RawJavaScriptCodeWithScope}, + raw::{write_string, CStr, RawJavaScriptCodeWithScope}, spec::{BinarySubtype, ElementType}, Binary, Bson, @@ -255,9 +255,10 @@ impl<'a> RawBsonRef<'a> { RawBsonRef::Document(d) => RawBson::Document(d.to_owned()), RawBsonRef::Boolean(b) => RawBson::Boolean(b), RawBsonRef::Null => RawBson::Null, - RawBsonRef::RegularExpression(re) => { - RawBson::RegularExpression(Regex::new(re.pattern, re.options)) - } + RawBsonRef::RegularExpression(re) => RawBson::RegularExpression(Regex { + pattern: re.pattern.into(), + options: re.options.into(), + }), RawBsonRef::JavaScriptCode(c) => RawBson::JavaScriptCode(c.to_owned()), RawBsonRef::JavaScriptCodeWithScope(c_w_s) => { RawBson::JavaScriptCodeWithScope(RawJavaScriptCodeWithScope { @@ -306,8 +307,8 @@ impl<'a> RawBsonRef<'a> { Self::Document(raw_document) => dest.extend(raw_document.as_bytes()), Self::Boolean(b) => dest.push(b as u8), Self::RegularExpression(re) => { - write_cstring(dest, re.pattern)?; - write_cstring(dest, re.options)?; + re.pattern.append_to(dest); + re.options.append_to(dest); } Self::JavaScriptCode(js) => write_string(dest, js), Self::JavaScriptCodeWithScope(code_w_scope) => { @@ -592,7 +593,7 @@ impl<'a> From<&'a Binary> for RawBsonRef<'a> { #[derive(Clone, Copy, Debug, PartialEq)] pub struct RawRegexRef<'a> { /// The regex pattern to match. - pub pattern: &'a str, + pub pattern: &'a CStr, /// The options for the regex. /// @@ -601,7 +602,7 @@ pub struct RawRegexRef<'a> { /// multiline matching, 'x' for verbose mode, 'l' to make \w, \W, etc. locale dependent, /// 's' for dotall mode ('.' matches everything), and 'u' to make \w, \W, etc. match /// unicode. - pub options: &'a str, + pub options: &'a CStr, } #[cfg(feature = "serde")] diff --git a/src/raw/cstr.rs b/src/raw/cstr.rs new file mode 100644 index 00000000..c2c28aed --- /dev/null +++ b/src/raw/cstr.rs @@ -0,0 +1,132 @@ +use core::str; + +use crate::error::{Error, Result}; + +// A BSON-spec cstring: Zero or more UTF-8 encoded characters, excluding the null byte. +#[derive(Debug)] +#[repr(transparent)] +pub struct CStr { + data: [u8], +} + +impl<'a> TryFrom<&'a str> for &'a CStr { + type Error = Error; + + fn try_from(value: &str) -> Result<&CStr> { + match validate_cstr(value) { + Some(cs) => Ok(cs), + None => Err(Error::malformed_bytes(format!( + "cstring with interior null: {:?}", + value, + ))), + } + } +} + +impl CStr { + const fn from_str_unchecked(value: &str) -> &Self { + // Safety: the conversion is safe because CStr is repr(transparent), and the deref is safe + // because the pointer came from a safe reference. + unsafe { &*(value.as_bytes() as *const [u8] as *const CStr) } + } + + pub fn as_str(&self) -> &str { + // Safety: the only way to constrct a CStr is from a valid &str. + unsafe { str::from_utf8_unchecked(&self.data) } + } + + pub fn len(&self) -> usize { + self.as_str().len() + } + + pub(crate) fn append_to(&self, buf: &mut Vec) { + buf.extend(&self.data); + buf.push(0); + } +} + +impl<'a, 'b> PartialEq<&'b CStr> for &'a CStr { + fn eq(&self, other: &&CStr) -> bool { + self.as_str() == other.as_str() + } +} + +#[diagnostic::on_unimplemented(message = "the string literal contains a zero byte")] +pub trait ValidCStr {} +pub struct IsValidCStr; +impl ValidCStr for IsValidCStr {} + +#[derive(Clone, Eq, PartialEq, Hash)] +#[repr(transparent)] +pub struct CString { + data: String, +} + +pub const fn validate_cstr(text: &str) -> Option<&CStr> { + let bytes = text.as_bytes(); + let mut i = 0; + while i < bytes.len() { + if bytes[i] == 0 { + return None; + } + i += 1; + } + Some(CStr::from_str_unchecked(text)) +} +pub const fn assert_valid_cstr() {} + +#[macro_export] +macro_rules! cstr { + ($text:expr) => {{ + const VALIDATED: Option<&$crate::raw::CStr> = $crate::raw::validate_cstr($text); + const VALID: bool = VALIDATED.is_some(); + $crate::raw::assert_valid_cstr::<$crate::raw::IsValidCStr>(); + VALIDATED.unwrap() + }}; +} +pub use cstr; + +impl TryFrom for CString { + type Error = Error; + + fn try_from(data: String) -> Result { + let _: &CStr = data.as_str().try_into()?; + Ok(Self { data }) + } +} + +impl CString { + pub fn into_string(self) -> String { + self.data + } + + pub fn as_str(&self) -> &str { + self.as_ref().as_str() + } +} + +impl From<&CStr> for CString { + fn from(value: &CStr) -> Self { + Self { + data: value.as_str().into(), + } + } +} + +impl AsRef for CString { + fn as_ref(&self) -> &CStr { + CStr::from_str_unchecked(self.data.as_str()) + } +} + +impl std::fmt::Debug for CString { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.data.fmt(f) + } +} + +impl std::fmt::Display for CString { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.data.fmt(f) + } +} diff --git a/src/raw/document.rs b/src/raw/document.rs index 4746d7c5..fbad3a4d 100644 --- a/src/raw/document.rs +++ b/src/raw/document.rs @@ -5,6 +5,7 @@ use std::{ use crate::{ error::{Error, Result}, + raw::CStr, Bson, DateTime, JavaScriptCodeWithScope, @@ -505,9 +506,10 @@ impl RawDocument { } } - pub(crate) fn read_cstring_at(&self, start_at: usize) -> RawResult<&str> { + pub(crate) fn read_cstring_at(&self, start_at: usize) -> RawResult<&CStr> { let bytes = self.cstring_bytes_at(start_at)?; - try_to_str(bytes) + let s = try_to_str(bytes)?; + s.try_into() } /// Copy this into a [`Document`], returning an error if invalid BSON is encountered. diff --git a/src/raw/iter.rs b/src/raw/iter.rs index 6355917e..79146ff2 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -2,7 +2,7 @@ use std::convert::TryInto; use crate::{ oid::ObjectId, - raw::{Error, Result, MIN_BSON_DOCUMENT_SIZE, MIN_CODE_WITH_SCOPE_SIZE}, + raw::{CStr, Error, Result, MIN_BSON_DOCUMENT_SIZE, MIN_CODE_WITH_SCOPE_SIZE}, spec::{BinarySubtype, ElementType}, Bson, DateTime, @@ -50,7 +50,7 @@ impl<'a> Iterator for Iter<'a> { match self.inner.next() { Some(Ok(elem)) => match elem.value() { Err(e) => Some(Err(e)), - Ok(value) => Some(Ok((elem.key, value))), + Ok(value) => Some(Ok((elem.key.as_str(), value))), }, Some(Err(e)) => Some(Err(e)), None => None, @@ -111,7 +111,7 @@ impl<'a> RawIter<'a> { #[derive(Clone)] pub struct RawElement<'a> { - key: &'a str, + key: &'a CStr, kind: ElementType, doc: &'a RawDocument, start_at: usize, @@ -160,7 +160,7 @@ impl<'a> RawElement<'a> { } pub fn key(&self) -> &'a str { - self.key + self.key.as_str() } pub fn element_type(&self) -> ElementType { @@ -305,11 +305,12 @@ impl<'a> RawElement<'a> { String::from_utf8_lossy(self.doc.cstring_bytes_at(self.start_at)?).into_owned(); let pattern_len = pattern.len(); Utf8LossyBson::RegularExpression(crate::Regex { - pattern, + pattern: pattern.try_into()?, options: String::from_utf8_lossy( self.doc.cstring_bytes_at(self.start_at + pattern_len + 1)?, ) - .into_owned(), + .into_owned() + .try_into()?, }) } _ => return Ok(None), @@ -317,7 +318,7 @@ impl<'a> RawElement<'a> { } fn malformed_error(&self, e: impl ToString) -> Error { - Error::malformed_bytes(e).with_key(self.key) + Error::malformed_bytes(e).with_key(self.key.as_str()) } pub(crate) fn slice(&self) -> &'a [u8] { @@ -344,7 +345,7 @@ impl<'a> RawElement<'a> { Ok(ObjectId::from_bytes( self.doc.as_bytes()[start_at..(start_at + 12)] .try_into() - .map_err(|e| Error::malformed_bytes(e).with_key(self.key))?, + .map_err(|e| Error::malformed_bytes(e).with_key(self.key.as_str()))?, )) } } @@ -443,7 +444,7 @@ impl<'a> Iterator for RawIter<'a> { }), Err(error) => { self.valid = false; - Err(error.with_key(key)) + Err(error.with_key(key.as_str())) } }) } diff --git a/src/raw/test.rs b/src/raw/test.rs index b651004d..ef28f456 100644 --- a/src/raw/test.rs +++ b/src/raw/test.rs @@ -104,7 +104,7 @@ fn rawdoc_to_doc() { "boolean": true, "datetime": DateTime::now(), "null": RawBson::Null, - "regex": Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}, + "regex": Regex { pattern: cstr!(r"end\s*$").into(), options: cstr!("i").into()}, "javascript": RawBson::JavaScriptCode(String::from("console.log(console);")), "symbol": RawBson::Symbol(String::from("artist-formerly-known-as")), "javascript_with_scope": RawJavaScriptCodeWithScope { @@ -267,7 +267,7 @@ fn null() { #[test] fn regex() { let rawdoc = rawdoc! { - "regex": Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}, + "regex": Regex { pattern: cstr!(r"end\s*$").into(), options: cstr!("i").into()}, }; let regex = rawdoc .get("regex") @@ -275,8 +275,8 @@ fn regex() { .expect("no key regex") .as_regex() .expect("was not regex"); - assert_eq!(regex.pattern, r"end\s*$"); - assert_eq!(regex.options, "i"); + assert_eq!(regex.pattern, cstr!(r"end\s*$")); + assert_eq!(regex.options, cstr!("i")); } #[test] fn javascript() { @@ -388,7 +388,7 @@ fn document_iteration() { "boolean": true, "datetime": DateTime::now(), "null": RawBson::Null, - "regex": Regex { pattern: String::from(r"end\s*$"), options: String::from("i") }, + "regex": Regex { pattern: cstr!(r"end\s*$").into(), options: cstr!("i").into() }, "javascript": RawBson::JavaScriptCode(String::from("console.log(console);")), "symbol": RawBson::Symbol(String::from("artist-formerly-known-as")), "javascript_with_scope": RawJavaScriptCodeWithScope { diff --git a/src/raw/test/append.rs b/src/raw/test/append.rs index fdaeef7f..4ba24892 100644 --- a/src/raw/test/append.rs +++ b/src/raw/test/append.rs @@ -254,11 +254,11 @@ fn undefined() { #[test] fn regex() { let expected = doc! { - "regex": Regex::new("some pattern", "abc"), + "regex": Regex::new("some pattern", "abc").unwrap(), }; append_test(expected, |doc| { - doc.append("regex", Regex::new("some pattern", "abc")) + doc.append("regex", Regex::new("some pattern", "abc").unwrap()) }); } diff --git a/src/raw/test/props.rs b/src/raw/test/props.rs index 6f0157d2..535c1a46 100644 --- a/src/raw/test/props.rs +++ b/src/raw/test/props.rs @@ -22,7 +22,7 @@ pub(crate) fn arbitrary_bson() -> impl Strategy { any::().prop_map(Bson::Int32), any::().prop_map(Bson::Int64), any::<(String, String)>().prop_map(|(pattern, options)| { - Bson::RegularExpression(Regex::new(pattern, options)) + Bson::RegularExpression(Regex::new(pattern, options).unwrap()) }), any::<[u8; 12]>().prop_map(|bytes| Bson::ObjectId(crate::oid::ObjectId::from_bytes(bytes))), (arbitrary_binary_subtype(), any::>()).prop_map(|(subtype, bytes)| { diff --git a/src/tests/modules/bson.rs b/src/tests/modules/bson.rs index 2f80e7c5..7dd366a6 100644 --- a/src/tests/modules/bson.rs +++ b/src/tests/modules/bson.rs @@ -5,6 +5,7 @@ use std::{ use crate::{ base64, + cstr, doc, oid::ObjectId, spec::BinarySubtype, @@ -76,8 +77,8 @@ fn test_display_timestamp_type() { #[test] fn test_display_regex_type() { let x = Regex { - pattern: String::from("pattern"), - options: String::from("options"), + pattern: cstr!("pattern").into(), + options: cstr!("options").into(), }; let output = "/pattern/options"; assert_eq!(format!("{}", x), output); @@ -130,12 +131,12 @@ fn from_impls() { assert_eq!(Bson::from(false), Bson::Boolean(false)); assert_eq!( Bson::from(Regex { - pattern: String::from("\\s+$"), - options: String::from("i") + pattern: cstr!("\\s+$").into(), + options: cstr!("i").into(), }), Bson::RegularExpression(Regex { - pattern: String::from("\\s+$"), - options: String::from("i") + pattern: cstr!("\\s+$").into(), + options: cstr!("i").into(), }) ); assert_eq!( diff --git a/src/tests/modules/macros.rs b/src/tests/modules/macros.rs index 642dfe58..a8c5ddc6 100644 --- a/src/tests/modules/macros.rs +++ b/src/tests/modules/macros.rs @@ -1,5 +1,6 @@ use crate::{ base64, + cstr, doc, oid::ObjectId, spec::BinarySubtype, @@ -34,7 +35,7 @@ fn standard_format() { }, "bool": true, "null": null, - "regexp": Bson::RegularExpression(Regex { pattern: "s[ao]d".to_owned(), options: "i".to_owned() }), + "regexp": Bson::RegularExpression(Regex { pattern: cstr!("s[ao]d").into(), options: cstr!("i").into() }), "with_wrapped_parens": (-20), "code": Bson::JavaScriptCode("function(x) { return x._id; }".to_owned()), "i32": 12, From d32ccc18d09b27ac34a7c8af3a7a7e54f8b953ec Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 27 Jun 2025 13:46:47 -0400 Subject: [PATCH 02/19] serde conversion --- src/bson.rs | 13 ++++-- src/de/raw.rs | 8 ++-- src/de/serde.rs | 4 +- src/extjson/de.rs | 2 +- src/extjson/models.rs | 2 +- src/raw/bson_ref.rs | 17 ++++--- src/raw/cstr.rs | 49 +++++++++++++++++--- src/raw/iter.rs | 4 +- src/raw/serde/bson_visitor.rs | 8 ++-- src/ser/raw/value_serializer.rs | 13 ++++-- src/ser/serde.rs | 4 +- src/tests/modules/macros.rs | 2 +- src/tests/modules/ser.rs | 7 --- src/tests/modules/serializer_deserializer.rs | 5 +- src/tests/serde.rs | 9 ++-- 15 files changed, 99 insertions(+), 48 deletions(-) diff --git a/src/bson.rs b/src/bson.rs index d33d7c87..d9a58c71 100644 --- a/src/bson.rs +++ b/src/bson.rs @@ -268,6 +268,12 @@ impl From for Bson { } } +impl From for Bson { + fn from(a: crate::raw::CString) -> Bson { + Bson::String(a.into_string()) + } +} + impl From for Bson { fn from(a: Document) -> Bson { Bson::Document(a) @@ -619,7 +625,7 @@ impl Bson { ref pattern, ref options, }) => { - let mut chars: Vec<_> = options.chars().collect(); + let mut chars: Vec<_> = options.as_str().chars().collect(); chars.sort_unstable(); let options: String = chars.into_iter().collect(); @@ -842,7 +848,9 @@ impl Bson { if let Ok(regex) = doc.get_document("$regularExpression") { if let Ok(pattern) = regex.get_str("pattern") { if let Ok(options) = regex.get_str("options") { - return Bson::RegularExpression(Regex::new(pattern, options)); + if let Ok(regex) = Regex::new(pattern, options) { + return Bson::RegularExpression(regex); + } } } } @@ -1160,7 +1168,6 @@ pub struct Regex { } impl Regex { - #[cfg(test)] pub(crate) fn new( pattern: impl AsRef, options: impl AsRef, diff --git a/src/de/raw.rs b/src/de/raw.rs index 1739cbc1..d594ae72 100644 --- a/src/de/raw.rs +++ b/src/de/raw.rs @@ -1306,15 +1306,15 @@ impl<'de> serde::de::Deserializer<'de> for &mut RegexAccess<'de> { RegexDeserializationStage::Pattern => { self.stage = RegexDeserializationStage::Options; match &self.re { - BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.pattern), - BsonCow::Owned(re) => visitor.visit_str(&re.pattern), + BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.pattern.as_str()), + BsonCow::Owned(re) => visitor.visit_str(&re.pattern.as_str()), } } RegexDeserializationStage::Options => { self.stage = RegexDeserializationStage::Done; match &self.re { - BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.options), - BsonCow::Owned(re) => visitor.visit_str(&re.options), + BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.options.as_str()), + BsonCow::Owned(re) => visitor.visit_str(&re.options.as_str()), } } RegexDeserializationStage::Done => { diff --git a/src/de/serde.rs b/src/de/serde.rs index 88f3769c..bd80f711 100644 --- a/src/de/serde.rs +++ b/src/de/serde.rs @@ -442,7 +442,9 @@ impl<'de> Visitor<'de> for BsonVisitor { "$regularExpression" => { let re = visitor.next_value::()?; - return Ok(Bson::RegularExpression(Regex::new(re.pattern, re.options))); + return Ok(Bson::RegularExpression( + Regex::new(re.pattern, re.options).map_err(serde::de::Error::custom)?, + )); } "$dbPointer" => { diff --git a/src/extjson/de.rs b/src/extjson/de.rs index df5bbd4a..f5f1af3b 100644 --- a/src/extjson/de.rs +++ b/src/extjson/de.rs @@ -55,7 +55,7 @@ impl TryFrom> for Bson { if obj.contains_key("$regularExpression") { let regex: models::Regex = serde_json::from_value(obj.into())?; - return Ok(regex.parse().into()); + return Ok(regex.parse()?.into()); } if obj.contains_key("$numberInt") { diff --git a/src/extjson/models.rs b/src/extjson/models.rs index 31c137c7..a2840df7 100644 --- a/src/extjson/models.rs +++ b/src/extjson/models.rs @@ -122,7 +122,7 @@ pub(crate) struct RegexBody { } impl Regex { - pub(crate) fn parse(self) -> crate::Regex { + pub(crate) fn parse(self) -> crate::error::Result { crate::Regex::new(self.body.pattern, self.body.options) } } diff --git a/src/raw/bson_ref.rs b/src/raw/bson_ref.rs index 5009e05c..df4ad893 100644 --- a/src/raw/bson_ref.rs +++ b/src/raw/bson_ref.rs @@ -255,10 +255,15 @@ impl<'a> RawBsonRef<'a> { RawBsonRef::Document(d) => RawBson::Document(d.to_owned()), RawBsonRef::Boolean(b) => RawBson::Boolean(b), RawBsonRef::Null => RawBson::Null, - RawBsonRef::RegularExpression(re) => RawBson::RegularExpression(Regex { - pattern: re.pattern.into(), - options: re.options.into(), - }), + RawBsonRef::RegularExpression(re) => { + let mut chars: Vec<_> = re.options.as_str().chars().collect(); + chars.sort_unstable(); + let options: String = chars.into_iter().collect(); + RawBson::RegularExpression(Regex { + pattern: re.pattern.into(), + options: super::CString::from_unchecked(options), + }) + } RawBsonRef::JavaScriptCode(c) => RawBson::JavaScriptCode(c.to_owned()), RawBsonRef::JavaScriptCodeWithScope(c_w_s) => { RawBson::JavaScriptCodeWithScope(RawJavaScriptCodeWithScope { @@ -629,8 +634,8 @@ impl serde::Serialize for RawRegexRef<'_> { { #[derive(serde::Serialize)] struct BorrowedRegexBody<'a> { - pattern: &'a str, - options: &'a str, + pattern: &'a CStr, + options: &'a CStr, } let mut state = serializer.serialize_struct("$regularExpression", 1)?; diff --git a/src/raw/cstr.rs b/src/raw/cstr.rs index c2c28aed..7c6230e5 100644 --- a/src/raw/cstr.rs +++ b/src/raw/cstr.rs @@ -51,17 +51,29 @@ impl<'a, 'b> PartialEq<&'b CStr> for &'a CStr { } } +impl std::borrow::ToOwned for CStr { + type Owned = CString; + + fn to_owned(&self) -> Self::Owned { + self.into() + } +} + +#[cfg(feature = "serde")] +impl serde::Serialize for &CStr { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + self.as_str().serialize(serializer) + } +} + #[diagnostic::on_unimplemented(message = "the string literal contains a zero byte")] pub trait ValidCStr {} pub struct IsValidCStr; impl ValidCStr for IsValidCStr {} -#[derive(Clone, Eq, PartialEq, Hash)] -#[repr(transparent)] -pub struct CString { - data: String, -} - pub const fn validate_cstr(text: &str) -> Option<&CStr> { let bytes = text.as_bytes(); let mut i = 0; @@ -86,6 +98,12 @@ macro_rules! cstr { } pub use cstr; +#[derive(Clone, Eq, PartialEq, Hash)] +#[repr(transparent)] +pub struct CString { + data: String, +} + impl TryFrom for CString { type Error = Error; @@ -95,7 +113,20 @@ impl TryFrom for CString { } } +impl TryFrom<&str> for CString { + type Error = Error; + + fn try_from(data: &str) -> Result { + let cs: &CStr = data.try_into()?; + Ok(cs.into()) + } +} + impl CString { + pub(crate) fn from_unchecked(data: String) -> Self { + Self { data } + } + pub fn into_string(self) -> String { self.data } @@ -130,3 +161,9 @@ impl std::fmt::Display for CString { self.data.fmt(f) } } + +impl std::borrow::Borrow for CString { + fn borrow(&self) -> &CStr { + self.as_ref() + } +} diff --git a/src/raw/iter.rs b/src/raw/iter.rs index 79146ff2..7516c3c0 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -145,9 +145,11 @@ impl TryInto for RawElement<'_> { impl<'a> RawElement<'a> { #[cfg(feature = "serde")] pub(crate) fn toplevel(bytes: &'a [u8]) -> Result { + use crate::raw::cstr; + let doc = RawDocument::decode_from_bytes(bytes)?; Ok(Self { - key: "TOPLEVEL", + key: cstr!("TOPLEVEL"), kind: ElementType::EmbeddedDocument, doc, start_at: 0, diff --git a/src/raw/serde/bson_visitor.rs b/src/raw/serde/bson_visitor.rs index 8739321e..436b873e 100644 --- a/src/raw/serde/bson_visitor.rs +++ b/src/raw/serde/bson_visitor.rs @@ -84,14 +84,14 @@ impl OwnedOrBorrowedRawBsonVisitor { match (body.pattern, body.options) { (Cow::Borrowed(p), Cow::Borrowed(o)) => { RawBsonRef::RegularExpression(RawRegexRef { - pattern: p, - options: o, + pattern: p.try_into().map_err(A::Error::custom)?, + options: o.try_into().map_err(A::Error::custom)?, }) .into() } (p, o) => RawBson::RegularExpression(Regex { - pattern: p.into_owned(), - options: o.into_owned(), + pattern: p.into_owned().try_into().map_err(A::Error::custom)?, + options: o.into_owned().try_into().map_err(A::Error::custom)?, }) .into(), } diff --git a/src/ser/raw/value_serializer.rs b/src/ser/raw/value_serializer.rs index c57aa44f..8789735d 100644 --- a/src/ser/raw/value_serializer.rs +++ b/src/ser/raw/value_serializer.rs @@ -51,7 +51,7 @@ enum SerializationStep { RegEx, RegExPattern, RegExOptions { - pattern: String, + pattern: crate::raw::CString, }, Timestamp, @@ -289,16 +289,19 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { } SerializationStep::RegExPattern => { self.state = SerializationStep::RegExOptions { - pattern: v.to_string(), + pattern: v.to_string().try_into()?, }; } SerializationStep::RegExOptions { pattern } => { let mut chars: Vec<_> = v.chars().collect(); chars.sort_unstable(); - let options = &chars.into_iter().collect::(); - RawBsonRef::RegularExpression(crate::RawRegexRef { pattern, options }) - .append_to(&mut self.root_serializer.bytes)?; + let options = chars.into_iter().collect::(); + RawBsonRef::RegularExpression(crate::RawRegexRef { + pattern: pattern.as_ref(), + options: options.as_str().try_into()?, + }) + .append_to(&mut self.root_serializer.bytes)?; } SerializationStep::Code => { RawBsonRef::JavaScriptCode(v).append_to(&mut self.root_serializer.bytes)?; diff --git a/src/ser/serde.rs b/src/ser/serde.rs index b7cb203b..ee47fb4f 100644 --- a/src/ser/serde.rs +++ b/src/ser/serde.rs @@ -625,8 +625,8 @@ impl Serialize for Regex { S: ser::Serializer, { let raw = RawRegexRef { - pattern: self.pattern.as_str(), - options: self.options.as_str(), + pattern: self.pattern.as_ref(), + options: self.options.as_ref(), }; raw.serialize(serializer) } diff --git a/src/tests/modules/macros.rs b/src/tests/modules/macros.rs index a8c5ddc6..0cc9900c 100644 --- a/src/tests/modules/macros.rs +++ b/src/tests/modules/macros.rs @@ -78,7 +78,7 @@ fn standard_format() { }, "bool": true, "null": null, - "regexp": Regex { pattern: "s[ao]d".to_owned(), options: "i".to_owned() }, + "regexp": Regex { pattern: cstr!("s[ao]d").into(), options: cstr!("i").into() }, "with_wrapped_parens": (-20), "code": RawBson::JavaScriptCode("function(x) { return x._id; }".to_owned()), "i32": 12, diff --git a/src/tests/modules/ser.rs b/src/tests/modules/ser.rs index bca4defe..468fcd12 100644 --- a/src/tests/modules/ser.rs +++ b/src/tests/modules/ser.rs @@ -11,7 +11,6 @@ use crate::{ tests::LOCK, Bson, Document, - Regex, }; #[test] @@ -162,12 +161,6 @@ fn cstring_null_bytes_error() { let doc = doc! { "a": { "\0": "b" } }; verify_doc(doc); - let regex = doc! { "regex": Regex { pattern: "\0".into(), options: "a".into() } }; - verify_doc(regex); - - let regex = doc! { "regex": Regex { pattern: "a".into(), options: "\0".into() } }; - verify_doc(regex); - fn verify_doc(doc: Document) { let result = doc.encode_to_vec(); assert!(result.is_err(), "unexpected success"); diff --git a/src/tests/modules/serializer_deserializer.rs b/src/tests/modules/serializer_deserializer.rs index 67bc6d09..01a36f76 100644 --- a/src/tests/modules/serializer_deserializer.rs +++ b/src/tests/modules/serializer_deserializer.rs @@ -6,6 +6,7 @@ use std::{ use serde::{Deserialize, Serialize}; use crate::{ + cstr, de::deserialize_from_document, doc, oid::ObjectId, @@ -158,8 +159,8 @@ fn test_serialize_deserialize_null() { fn test_serialize_deserialize_regexp() { let _guard = LOCK.run_concurrently(); let src = Bson::RegularExpression(Regex { - pattern: "1".to_owned(), - options: "2".to_owned(), + pattern: cstr!("1").to_owned(), + options: cstr!("2").to_owned(), }); let dst = vec![14, 0, 0, 0, 11, 107, 101, 121, 0, 49, 0, 50, 0, 0]; diff --git a/src/tests/serde.rs b/src/tests/serde.rs index 091df19c..b25456de 100644 --- a/src/tests/serde.rs +++ b/src/tests/serde.rs @@ -2,6 +2,7 @@ use crate::{ bson, + cstr, deserialize_from_bson, deserialize_from_document, doc, @@ -150,8 +151,8 @@ fn test_ser_regex() { } let regex = Regex { - pattern: "12".into(), - options: "01".into(), + pattern: cstr!("12").into(), + options: cstr!("01").into(), }; let foo = Foo { @@ -179,8 +180,8 @@ fn test_de_regex() { } let regex = Regex { - pattern: "12".into(), - options: "01".into(), + pattern: cstr!("12").into(), + options: cstr!("01").into(), }; let foo: Foo = deserialize_from_bson(Bson::Document(doc! { From 1c33eb57852fcdc76b6a018bd1f056b3242f4931 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 27 Jun 2025 13:57:46 -0400 Subject: [PATCH 03/19] propagate infallibility --- src/raw/bson_ref.rs | 3 +-- src/raw/document_buf.rs | 6 ++--- src/raw/document_buf/raw_writer.rs | 10 +++----- src/raw/serde/seeded_visitor.rs | 3 +-- src/ser/raw.rs | 40 ++++++++++++++++-------------- src/ser/raw/document_serializer.rs | 8 +++--- src/ser/raw/value_serializer.rs | 37 ++++++++++++++------------- 7 files changed, 53 insertions(+), 54 deletions(-) diff --git a/src/raw/bson_ref.rs b/src/raw/bson_ref.rs index df4ad893..d09c4824 100644 --- a/src/raw/bson_ref.rs +++ b/src/raw/bson_ref.rs @@ -293,7 +293,7 @@ impl<'a> RawBsonRef<'a> { } #[inline] - pub(crate) fn append_to(self, dest: &mut Vec) -> Result<()> { + pub(crate) fn append_to(self, dest: &mut Vec) { match self { Self::Int32(val) => dest.extend(val.to_le_bytes()), Self::Int64(val) => dest.extend(val.to_le_bytes()), @@ -333,7 +333,6 @@ impl<'a> RawBsonRef<'a> { } Self::Null | Self::Undefined | Self::MinKey | Self::MaxKey => {} } - Ok(()) } } diff --git a/src/raw/document_buf.rs b/src/raw/document_buf.rs index 8c8ee3f4..309a3c19 100644 --- a/src/raw/document_buf.rs +++ b/src/raw/document_buf.rs @@ -220,9 +220,9 @@ impl RawDocumentBuf { key: impl AsRef, value: impl BindRawBsonRef, ) -> crate::error::Result<()> { - value.bind(|value_ref| { - raw_writer::RawWriter::new(&mut self.data).append(key.as_ref(), value_ref) - }) + let key = key.as_ref().try_into()?; + Ok(value + .bind(|value_ref| raw_writer::RawWriter::new(&mut self.data).append(key, value_ref))) } } diff --git a/src/raw/document_buf/raw_writer.rs b/src/raw/document_buf/raw_writer.rs index dc550dbc..d988a823 100644 --- a/src/raw/document_buf/raw_writer.rs +++ b/src/raw/document_buf/raw_writer.rs @@ -1,4 +1,4 @@ -use crate::{raw::write_cstring, RawBsonRef}; +use crate::{raw::CStr, RawBsonRef}; pub(super) struct RawWriter<'a> { data: &'a mut Vec, @@ -9,19 +9,17 @@ impl<'a> RawWriter<'a> { Self { data } } - pub(super) fn append(&mut self, key: &str, value: RawBsonRef) -> crate::error::Result<()> { + pub(super) fn append(&mut self, key: &CStr, value: RawBsonRef) { let original_len = self.data.len(); self.data[original_len - 1] = value.element_type() as u8; - write_cstring(self.data, key)?; - value.append_to(self.data)?; + key.append_to(self.data); + value.append_to(self.data); // append trailing null byte self.data.push(0); // update length let new_len = (self.data.len() as i32).to_le_bytes(); self.data[0..4].copy_from_slice(&new_len); - - Ok(()) } } diff --git a/src/raw/serde/seeded_visitor.rs b/src/raw/serde/seeded_visitor.rs index 8361421c..cbd6c1ef 100644 --- a/src/raw/serde/seeded_visitor.rs +++ b/src/raw/serde/seeded_visitor.rs @@ -273,8 +273,7 @@ impl<'de> Visitor<'de> for SeededVisitor<'_, 'de> { // Cases that don't _ => { let bson = bson.as_ref(); - bson.append_to(self.buffer.get_owned_buffer()) - .map_err(A::Error::custom)?; + bson.append_to(self.buffer.get_owned_buffer()); Ok(bson.element_type()) } } diff --git a/src/ser/raw.rs b/src/ser/raw.rs index c376c479..2dd4918c 100644 --- a/src/ser/raw.rs +++ b/src/ser/raw.rs @@ -9,7 +9,7 @@ use serde::{ use self::value_serializer::{ValueSerializer, ValueType}; use crate::{ - raw::{write_cstring, RAW_ARRAY_NEWTYPE, RAW_DOCUMENT_NEWTYPE}, + raw::{write_cstring, CStr, RAW_ARRAY_NEWTYPE, RAW_DOCUMENT_NEWTYPE}, ser::{Error, Result}, serde_helpers::HUMAN_READABLE_NEWTYPE, spec::{BinarySubtype, ElementType}, @@ -108,7 +108,7 @@ impl Serializer { fn serialize_raw(&mut self, v: RawBsonRef) -> Result<()> { self.update_element_type(v.element_type())?; - v.append_to(&mut self.bytes)?; + v.append_to(&mut self.bytes); Ok(()) } } @@ -290,7 +290,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { T: serde::Serialize + ?Sized, { self.update_element_type(ElementType::EmbeddedDocument)?; - let mut d = DocumentSerializer::start(&mut *self)?; + let mut d = DocumentSerializer::start(&mut *self); d.serialize_entry(variant, value)?; d.end_doc()?; Ok(()) @@ -299,7 +299,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { #[inline] fn serialize_seq(self, _len: Option) -> Result { self.update_element_type(ElementType::Array)?; - DocumentSerializer::start(&mut *self) + Ok(DocumentSerializer::start(&mut *self)) } #[inline] @@ -325,13 +325,17 @@ impl<'a> serde::Serializer for &'a mut Serializer { _len: usize, ) -> Result { self.update_element_type(ElementType::EmbeddedDocument)?; - VariantSerializer::start(&mut *self, variant, VariantInnerType::Tuple) + Ok(VariantSerializer::start( + &mut *self, + variant.try_into()?, + VariantInnerType::Tuple, + )) } #[inline] fn serialize_map(self, _len: Option) -> Result { self.update_element_type(ElementType::EmbeddedDocument)?; - DocumentSerializer::start(&mut *self) + Ok(DocumentSerializer::start(&mut *self)) } #[inline] @@ -360,7 +364,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { )?; match value_type { Some(vt) => Ok(StructSerializer::Value(ValueSerializer::new(self, vt))), - None => Ok(StructSerializer::Document(DocumentSerializer::start(self)?)), + None => Ok(StructSerializer::Document(DocumentSerializer::start(self))), } } @@ -373,7 +377,11 @@ impl<'a> serde::Serializer for &'a mut Serializer { _len: usize, ) -> Result { self.update_element_type(ElementType::EmbeddedDocument)?; - VariantSerializer::start(&mut *self, variant, VariantInnerType::Struct) + Ok(VariantSerializer::start( + &mut *self, + variant.try_into()?, + VariantInnerType::Struct, + )) } } @@ -431,32 +439,28 @@ pub(crate) struct VariantSerializer<'a> { } impl<'a> VariantSerializer<'a> { - fn start( - rs: &'a mut Serializer, - variant: &'static str, - inner_type: VariantInnerType, - ) -> Result { + fn start(rs: &'a mut Serializer, variant: &'static CStr, inner_type: VariantInnerType) -> Self { let doc_start = rs.bytes.len(); // write placeholder length for document, will be updated at end static ZERO: RawBsonRef = RawBsonRef::Int32(0); - ZERO.append_to(&mut rs.bytes)?; + ZERO.append_to(&mut rs.bytes); let inner = match inner_type { VariantInnerType::Struct => ElementType::EmbeddedDocument, VariantInnerType::Tuple => ElementType::Array, }; rs.bytes.push(inner as u8); - write_cstring(&mut rs.bytes, variant)?; + variant.append_to(&mut rs.bytes); let inner_start = rs.bytes.len(); // write placeholder length for inner, will be updated at end - ZERO.append_to(&mut rs.bytes)?; + ZERO.append_to(&mut rs.bytes); - Ok(Self { + Self { root_serializer: rs, num_elements_serialized: 0, doc_start, inner_start, - }) + } } #[inline] diff --git a/src/ser/raw/document_serializer.rs b/src/ser/raw/document_serializer.rs index 21587da6..c4cf03c3 100644 --- a/src/ser/raw/document_serializer.rs +++ b/src/ser/raw/document_serializer.rs @@ -20,14 +20,14 @@ pub(crate) struct DocumentSerializer<'a> { } impl<'a> DocumentSerializer<'a> { - pub(crate) fn start(rs: &'a mut Serializer) -> crate::ser::Result { + pub(crate) fn start(rs: &'a mut Serializer) -> Self { let start = rs.bytes.len(); - RawBsonRef::Int32(0).append_to(&mut rs.bytes)?; - Ok(Self { + RawBsonRef::Int32(0).append_to(&mut rs.bytes); + Self { root_serializer: rs, num_keys_serialized: 0, start, - }) + } } /// Serialize a document key using the provided closure. diff --git a/src/ser/raw/value_serializer.rs b/src/ser/raw/value_serializer.rs index 8789735d..3b661b00 100644 --- a/src/ser/raw/value_serializer.rs +++ b/src/ser/raw/value_serializer.rs @@ -199,7 +199,7 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { let increment = u32::try_from(v).map_err(Error::custom)?; RawBsonRef::Timestamp(crate::Timestamp { time, increment }) - .append_to(&mut self.root_serializer.bytes)?; + .append_to(&mut self.root_serializer.bytes); Ok(()) } @@ -215,7 +215,7 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { subtype: v.into(), bytes, }; - RawBsonRef::Binary(binary).append_to(&mut self.root_serializer.bytes)?; + RawBsonRef::Binary(binary).append_to(&mut self.root_serializer.bytes); self.state = SerializationStep::Done; Ok(()) } @@ -258,11 +258,11 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { SerializationStep::DateTimeNumberLong => { let millis = v.parse().map_err(Error::custom)?; RawBsonRef::DateTime(crate::DateTime::from_millis(millis)) - .append_to(&mut self.root_serializer.bytes)?; + .append_to(&mut self.root_serializer.bytes); } SerializationStep::Oid => { let oid = ObjectId::parse_str(v).map_err(Error::custom)?; - RawBsonRef::ObjectId(oid).append_to(&mut self.root_serializer.bytes)?; + RawBsonRef::ObjectId(oid).append_to(&mut self.root_serializer.bytes); } SerializationStep::BinaryBytes => { self.state = SerializationStep::BinarySubType { @@ -274,10 +274,10 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { let subtype: BinarySubtype = subtype_byte[0].into(); let bytes = &base64::decode(base64.as_str()).map_err(Error::custom)?; let binary = RawBinaryRef { subtype, bytes }; - RawBsonRef::Binary(binary).append_to(&mut self.root_serializer.bytes)?; + RawBsonRef::Binary(binary).append_to(&mut self.root_serializer.bytes); } SerializationStep::Symbol => { - RawBsonRef::Symbol(v).append_to(&mut self.root_serializer.bytes)?; + RawBsonRef::Symbol(v).append_to(&mut self.root_serializer.bytes); } SerializationStep::DbPointerRef => { self.state = SerializationStep::DbPointerId { ns: v.to_owned() }; @@ -285,7 +285,7 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { SerializationStep::DbPointerId { ns } => { let id = ObjectId::parse_str(v).map_err(Error::custom)?; RawBsonRef::DbPointer(crate::RawDbPointerRef { namespace: ns, id }) - .append_to(&mut self.root_serializer.bytes)?; + .append_to(&mut self.root_serializer.bytes); } SerializationStep::RegExPattern => { self.state = SerializationStep::RegExOptions { @@ -301,10 +301,10 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { pattern: pattern.as_ref(), options: options.as_str().try_into()?, }) - .append_to(&mut self.root_serializer.bytes)?; + .append_to(&mut self.root_serializer.bytes); } SerializationStep::Code => { - RawBsonRef::JavaScriptCode(v).append_to(&mut self.root_serializer.bytes)?; + RawBsonRef::JavaScriptCode(v).append_to(&mut self.root_serializer.bytes); } SerializationStep::CodeWithScopeCode => { self.state = SerializationStep::CodeWithScopeScope { @@ -327,7 +327,7 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { match self.state { SerializationStep::Decimal128Value => { let dec = crate::Decimal128::from_bytes(v.try_into().map_err(Error::custom)?); - RawBsonRef::Decimal128(dec).append_to(&mut self.root_serializer.bytes)?; + RawBsonRef::Decimal128(dec).append_to(&mut self.root_serializer.bytes); Ok(()) } SerializationStep::BinaryBytes => { @@ -339,8 +339,7 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { code, scope: RawDocument::decode_from_bytes(v).map_err(Error::custom)?, }; - RawBsonRef::JavaScriptCodeWithScope(raw) - .append_to(&mut self.root_serializer.bytes)?; + RawBsonRef::JavaScriptCodeWithScope(raw).append_to(&mut self.root_serializer.bytes); self.state = SerializationStep::Done; Ok(()) } @@ -448,9 +447,9 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { #[inline] fn serialize_map(self, _len: Option) -> Result { match self.state { - SerializationStep::CodeWithScopeScope { ref code, raw } if !raw => { - CodeWithScopeSerializer::start(code.as_str(), self.root_serializer) - } + SerializationStep::CodeWithScopeScope { ref code, raw } if !raw => Ok( + CodeWithScopeSerializer::start(code.as_str(), self.root_serializer), + ), _ => Err(self.invalid_step("map")), } } @@ -613,13 +612,13 @@ pub(crate) struct CodeWithScopeSerializer<'a> { impl<'a> CodeWithScopeSerializer<'a> { #[inline] - fn start(code: &str, rs: &'a mut Serializer) -> Result { + fn start(code: &str, rs: &'a mut Serializer) -> Self { let start = rs.bytes.len(); - RawBsonRef::Int32(0).append_to(&mut rs.bytes)?; // placeholder length + RawBsonRef::Int32(0).append_to(&mut rs.bytes); // placeholder length write_string(&mut rs.bytes, code); - let doc = DocumentSerializer::start(rs)?; - Ok(Self { start, doc }) + let doc = DocumentSerializer::start(rs); + Self { start, doc } } } From 1aa5d08c4fb46417019881ec346473e217a070f8 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 27 Jun 2025 18:47:06 -0400 Subject: [PATCH 04/19] cstr append key --- src/macros.rs | 16 ++-- src/raw/array_buf.rs | 33 ++++--- src/raw/bson_ref.rs | 2 +- src/raw/cstr.rs | 8 +- src/raw/document_buf.rs | 46 +++++---- src/raw/test/append.rs | 195 ++++++++++++++++++--------------------- src/tests/spec/corpus.rs | 5 +- 7 files changed, 146 insertions(+), 159 deletions(-) diff --git a/src/macros.rs b/src/macros.rs index 62d640b7..edafa969 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -240,12 +240,12 @@ macro_rules! rawbson { // Finished with trailing comma. (@array [$($elems:expr,)*]) => { - $crate::RawArrayBuf::from_iter(vec![$($elems,)*]).expect("invalid bson value") + $crate::RawArrayBuf::from_iter(vec![$($elems,)*]) }; // Finished without trailing comma. (@array [$($elems:expr),*]) => { - $crate::RawArrayBuf::from_iter(vec![$($elems),*]).expect("invalid bson value") + $crate::RawArrayBuf::from_iter(vec![$($elems),*]) }; // Next element is `null`. @@ -292,14 +292,18 @@ macro_rules! rawbson { (@object $object:ident () () ()) => {}; // Insert the current entry followed by trailing comma. - (@object $object:ident [$($key:tt)+] ($value:expr) , $($rest:tt)*) => { - $object.append(($($key)+), $value).expect("invalid bson value"); + (@object $object:ident [$($key:tt)+] ($value:expr) , $($rest:tt)*) => {{ + let key: &str = ($($key)+).as_ref(); + let key: &$crate::raw::CStr = key.try_into().expect("invalid key"); + $object.append(key, $value); $crate::rawbson!(@object $object () ($($rest)*) ($($rest)*)); - }; + }}; // Insert the last entry without trailing comma. (@object $object:ident [$($key:tt)+] ($value:expr)) => { - $object.append(($($key)+), $value).expect("invalid bson value"); + let key: &str = ($($key)+).as_ref(); + let key: &$crate::raw::CStr = key.try_into().expect("invalid key"); + $object.append(key, $value); }; // Next value is `null`. diff --git a/src/raw/array_buf.rs b/src/raw/array_buf.rs index d7e2068a..8a6de4b1 100644 --- a/src/raw/array_buf.rs +++ b/src/raw/array_buf.rs @@ -53,19 +53,6 @@ impl RawArrayBuf { } } - #[allow(clippy::should_implement_trait)] - pub fn from_iter(iter: I) -> crate::error::Result - where - B: BindRawBsonRef, - I: IntoIterator, - { - let mut array_buf = RawArrayBuf::new(); - for item in iter { - array_buf.push(item)?; - } - Ok(array_buf) - } - /// Construct a new [`RawArrayBuf`] from the provided [`Vec`] of bytes. /// /// This involves a traversal of the array to count the values. @@ -102,10 +89,22 @@ impl RawArrayBuf { /// assert!(iter.next().is_none()); /// # Ok::<(), Error>(()) /// ``` - pub fn push(&mut self, value: impl BindRawBsonRef) -> crate::error::Result<()> { - self.inner.append(self.len.to_string(), value)?; + pub fn push(&mut self, value: impl BindRawBsonRef) { + self.inner.append( + super::CString::from_string_unchecked(self.len.to_string()), + value, + ); self.len += 1; - Ok(()) + } +} + +impl FromIterator for RawArrayBuf { + fn from_iter>(iter: T) -> Self { + let mut array_buf = RawArrayBuf::new(); + for item in iter { + array_buf.push(item); + } + array_buf } } @@ -200,7 +199,7 @@ impl TryFrom for RawArrayBuf { let mut tmp = RawArrayBuf::new(); for val in value { let raw: super::RawBson = val.try_into()?; - tmp.push(raw)?; + tmp.push(raw); } Ok(tmp) } diff --git a/src/raw/bson_ref.rs b/src/raw/bson_ref.rs index d09c4824..3b50f145 100644 --- a/src/raw/bson_ref.rs +++ b/src/raw/bson_ref.rs @@ -261,7 +261,7 @@ impl<'a> RawBsonRef<'a> { let options: String = chars.into_iter().collect(); RawBson::RegularExpression(Regex { pattern: re.pattern.into(), - options: super::CString::from_unchecked(options), + options: super::CString::from_string_unchecked(options), }) } RawBsonRef::JavaScriptCode(c) => RawBson::JavaScriptCode(c.to_owned()), diff --git a/src/raw/cstr.rs b/src/raw/cstr.rs index 7c6230e5..dd8ed854 100644 --- a/src/raw/cstr.rs +++ b/src/raw/cstr.rs @@ -59,6 +59,12 @@ impl std::borrow::ToOwned for CStr { } } +impl AsRef for CStr { + fn as_ref(&self) -> &CStr { + self + } +} + #[cfg(feature = "serde")] impl serde::Serialize for &CStr { fn serialize(&self, serializer: S) -> std::result::Result @@ -123,7 +129,7 @@ impl TryFrom<&str> for CString { } impl CString { - pub(crate) fn from_unchecked(data: String) -> Self { + pub(crate) fn from_string_unchecked(data: String) -> Self { Self { data } } diff --git a/src/raw/document_buf.rs b/src/raw/document_buf.rs index 309a3c19..42017fc5 100644 --- a/src/raw/document_buf.rs +++ b/src/raw/document_buf.rs @@ -4,7 +4,10 @@ use std::{ ops::Deref, }; -use crate::{raw::MIN_BSON_DOCUMENT_SIZE, Document}; +use crate::{ + raw::{CStr, MIN_BSON_DOCUMENT_SIZE}, + Document, +}; use super::{bson::RawBson, iter::Iter, RawBsonRef, RawDocument, RawIter, Result}; @@ -87,20 +90,6 @@ impl RawDocumentBuf { Self::decode_from_bytes(buf) } - #[allow(clippy::should_implement_trait)] - pub fn from_iter(iter: I) -> Result - where - S: AsRef, - B: BindRawBsonRef, - I: IntoIterator, - { - let mut buf = RawDocumentBuf::new(); - for (k, v) in iter { - buf.append(k, v)?; - } - Ok(buf) - } - /// Create a [`RawDocumentBuf`] from a [`Document`]. /// /// ``` @@ -117,8 +106,9 @@ impl RawDocumentBuf { pub fn from_document(doc: impl Borrow) -> Result { let mut out = RawDocumentBuf::new(); for (k, v) in doc.borrow() { + let k: &CStr = k.as_str().try_into()?; let val: RawBson = v.clone().try_into()?; - out.append(k, val)?; + out.append(k, val); } Ok(out) } @@ -215,14 +205,19 @@ impl RawDocumentBuf { /// assert_eq!(doc.to_document()?, expected); /// # Ok::<(), Error>(()) /// ``` - pub fn append( - &mut self, - key: impl AsRef, - value: impl BindRawBsonRef, - ) -> crate::error::Result<()> { - let key = key.as_ref().try_into()?; - Ok(value - .bind(|value_ref| raw_writer::RawWriter::new(&mut self.data).append(key, value_ref))) + pub fn append(&mut self, key: impl AsRef, value: impl BindRawBsonRef) { + let key = key.as_ref(); + value.bind(|value_ref| raw_writer::RawWriter::new(&mut self.data).append(key, value_ref)); + } +} + +impl, B: BindRawBsonRef> FromIterator<(K, B)> for RawDocumentBuf { + fn from_iter>(iter: T) -> Self { + let mut buf = RawDocumentBuf::new(); + for (k, v) in iter { + buf.append(k, v); + } + buf } } @@ -287,8 +282,9 @@ impl TryFrom for RawDocumentBuf { fn try_from(doc: Document) -> std::result::Result { let mut out = RawDocumentBuf::new(); for (k, v) in doc { + let k: &CStr = k.as_str().try_into()?; let val: RawBson = v.try_into()?; - out.append(k, val)?; + out.append(k, val); } Ok(out) } diff --git a/src/raw/test/append.rs b/src/raw/test/append.rs index 4ba24892..8b9dcfea 100644 --- a/src/raw/test/append.rs +++ b/src/raw/test/append.rs @@ -1,6 +1,6 @@ use crate::{ oid::ObjectId, - raw::RawJavaScriptCodeWithScope, + raw::{cstr, RawJavaScriptCodeWithScope}, spec::BinarySubtype, tests::LOCK, Binary, @@ -19,13 +19,10 @@ use crate::{ use pretty_assertions::assert_eq; -fn append_test( - expected: Document, - append: impl FnOnce(&mut RawDocumentBuf) -> crate::error::Result<()>, -) { +fn append_test(expected: Document, append: impl FnOnce(&mut RawDocumentBuf)) { let bytes = expected.encode_to_vec().unwrap(); let mut buf = RawDocumentBuf::new(); - assert!(append(&mut buf).is_ok()); + append(&mut buf); assert_eq!(buf.as_bytes(), bytes); } @@ -37,10 +34,9 @@ fn i32() { "c": 0_i32 }; append_test(expected, |doc| { - doc.append("a", -1_i32)?; - doc.append("b", 123_i32)?; - doc.append("c", 0_i32)?; - Ok(()) + doc.append(cstr!("a"), -1_i32); + doc.append(cstr!("b"), 123_i32); + doc.append(cstr!("c"), 0_i32); }); } @@ -52,10 +48,9 @@ fn i64() { "c": 0_i64 }; append_test(expected, |doc| { - doc.append("a", -1_i64)?; - doc.append("b", 123_i64)?; - doc.append("c", 0_i64)?; - Ok(()) + doc.append(cstr!("a"), -1_i64); + doc.append(cstr!("b"), 123_i64); + doc.append(cstr!("c"), 0_i64); }); } @@ -68,11 +63,10 @@ fn str() { "last": "the lazy sheep dog", }; append_test(expected, |doc| { - doc.append("first", "the quick")?; - doc.append("second", "brown fox")?; - doc.append("third", "jumped over")?; - doc.append("last", "the lazy sheep dog")?; - Ok(()) + doc.append(cstr!("first"), "the quick"); + doc.append(cstr!("second"), "brown fox"); + doc.append(cstr!("third"), "jumped over"); + doc.append(cstr!("last"), "the lazy sheep dog"); }); } @@ -86,12 +80,11 @@ fn double() { "inf": f64::INFINITY, }; append_test(expected, |doc| { - doc.append("positive", 12.5)?; - doc.append("0", 0.0)?; - doc.append("negative", -123.24)?; - doc.append("nan", f64::NAN)?; - doc.append("inf", f64::INFINITY)?; - Ok(()) + doc.append(cstr!("positive"), 12.5); + doc.append(cstr!("0"), 0.0); + doc.append(cstr!("negative"), -123.24); + doc.append(cstr!("nan"), f64::NAN); + doc.append(cstr!("inf"), f64::INFINITY); }); } @@ -102,9 +95,8 @@ fn boolean() { "false": false, }; append_test(expected, |doc| { - doc.append("true", true)?; - doc.append("false", false)?; - Ok(()) + doc.append(cstr!("true"), true); + doc.append(cstr!("false"), false); }); } @@ -113,7 +105,7 @@ fn null() { let expected = doc! { "null": null, }; - append_test(expected, |doc| doc.append("null", RawBson::Null)); + append_test(expected, |doc| doc.append(cstr!("null"), RawBson::Null)); } #[test] @@ -126,12 +118,11 @@ fn document() { } }; append_test(expected, |doc| { - doc.append("empty", RawDocumentBuf::new())?; + doc.append(cstr!("empty"), RawDocumentBuf::new()); let mut buf = RawDocumentBuf::new(); - buf.append("a", 1_i32)?; - buf.append("b", true)?; - doc.append("subdoc", buf)?; - Ok(()) + buf.append(cstr!("a"), 1_i32); + buf.append(cstr!("b"), true); + doc.append(cstr!("subdoc"), buf); }); } @@ -147,16 +138,15 @@ fn array() { ] }; append_test(expected, |doc| { - doc.append("empty", RawArrayBuf::new())?; + doc.append(cstr!("empty"), RawArrayBuf::new()); let mut buf = RawArrayBuf::new(); - buf.push(true)?; - buf.push("string")?; + buf.push(true); + buf.push("string"); let mut subdoc = RawDocumentBuf::new(); - subdoc.append("a", "subdoc")?; - buf.push(subdoc)?; - buf.push(123_i32)?; - doc.append("array", buf)?; - Ok(()) + subdoc.append(cstr!("a"), "subdoc"); + buf.push(subdoc); + buf.push(123_i32); + doc.append(cstr!("array"), buf); }); } @@ -168,7 +158,7 @@ fn oid() { let expected = doc! { "oid": oid, }; - append_test(expected, |doc| doc.append("oid", oid)); + append_test(expected, |doc| doc.append(cstr!("oid"), oid)); } #[test] @@ -182,9 +172,8 @@ fn datetime() { }; append_test(expected, |doc| { - doc.append("now", dt)?; - doc.append("old", old)?; - Ok(()) + doc.append(cstr!("now"), dt); + doc.append(cstr!("old"), old); }); } @@ -199,7 +188,7 @@ fn timestamp() { "ts": ts, }; - append_test(expected, |doc| doc.append("ts", ts)); + append_test(expected, |doc| doc.append(cstr!("ts"), ts)); } #[test] @@ -222,9 +211,8 @@ fn binary() { }; append_test(expected, |doc| { - doc.append("generic", bin)?; - doc.append("binary_old", old)?; - Ok(()) + doc.append(cstr!("generic"), bin); + doc.append(cstr!("binary_old"), old); }); } @@ -236,9 +224,8 @@ fn min_max_key() { }; append_test(expected, |doc| { - doc.append("min", RawBson::MinKey)?; - doc.append("max", RawBson::MaxKey)?; - Ok(()) + doc.append(cstr!("min"), RawBson::MinKey); + doc.append(cstr!("max"), RawBson::MaxKey); }); } @@ -248,7 +235,9 @@ fn undefined() { "undefined": Bson::Undefined, }; - append_test(expected, |doc| doc.append("undefined", RawBson::Undefined)); + append_test(expected, |doc| { + doc.append(cstr!("undefined"), RawBson::Undefined) + }); } #[test] @@ -258,7 +247,7 @@ fn regex() { }; append_test(expected, |doc| { - doc.append("regex", Regex::new("some pattern", "abc").unwrap()) + doc.append(cstr!("regex"), Regex::new("some pattern", "abc").unwrap()) }); } @@ -275,19 +264,21 @@ fn code() { }; append_test(expected, |doc| { - doc.append("code", RawBson::JavaScriptCode("some code".to_string()))?; + doc.append( + cstr!("code"), + RawBson::JavaScriptCode("some code".to_string()), + ); let mut scope = RawDocumentBuf::new(); - scope.append("a", 1_i32)?; - scope.append("b", true)?; + scope.append(cstr!("a"), 1_i32); + scope.append(cstr!("b"), true); doc.append( - "code_w_scope", + cstr!("code_w_scope"), RawJavaScriptCodeWithScope { code: "some code".to_string(), scope, }, - )?; - Ok(()) + ); }); } @@ -298,7 +289,7 @@ fn symbol() { }; append_test(expected, |doc| { - doc.append("symbol", RawBson::Symbol("symbol".to_string())) + doc.append(cstr!("symbol"), RawBson::Symbol("symbol".to_string())) }); } @@ -317,7 +308,7 @@ fn dbpointer() { append_test(expected, |doc| { doc.append( - "symbol", + cstr!("symbol"), RawBson::DbPointer(DbPointer { namespace: "ns".to_string(), id, @@ -333,7 +324,7 @@ fn decimal128() { "decimal": decimal }; - append_test(expected, |doc| doc.append("decimal", decimal)); + append_test(expected, |doc| doc.append(cstr!("decimal"), decimal)); } #[test] @@ -352,34 +343,33 @@ fn general() { }; append_test(expected, |doc| { - doc.append("a", true)?; - doc.append("second key", 123.4)?; - doc.append("third", 15_i64)?; - doc.append("32", -100101_i32)?; + doc.append(cstr!("a"), true); + doc.append(cstr!("second key"), 123.4); + doc.append(cstr!("third"), 15_i64); + doc.append(cstr!("32"), -100101_i32); let mut subdoc = RawDocumentBuf::new(); - subdoc.append("a", "subkey")?; + subdoc.append(cstr!("a"), "subkey"); let mut subsubdoc = RawDocumentBuf::new(); - subsubdoc.append("subdoc", dt)?; - subdoc.append("another", subsubdoc)?; - doc.append("subdoc", subdoc)?; + subsubdoc.append(cstr!("subdoc"), dt); + subdoc.append(cstr!("another"), subsubdoc); + doc.append(cstr!("subdoc"), subdoc); let mut array = RawArrayBuf::new(); - array.push(1_i64)?; - array.push(true)?; + array.push(1_i64); + array.push(true); let mut array_subdoc = RawDocumentBuf::new(); - array_subdoc.append("doc", 23_i64)?; - array.push(array_subdoc)?; + array_subdoc.append(cstr!("doc"), 23_i64); + array.push(array_subdoc); let mut sub_array = RawArrayBuf::new(); - sub_array.push("another")?; - sub_array.push("array")?; - array.push(sub_array)?; + sub_array.push("another"); + sub_array.push("array"); + array.push(sub_array); - doc.append("array", array)?; - Ok(()) + doc.append(cstr!("array"), array); }); } @@ -387,25 +377,18 @@ fn general() { fn from_iter() { let doc_buf = RawDocumentBuf::from_iter([ ( - "array", - RawBson::Array( - RawArrayBuf::from_iter([ - RawBson::Boolean(true), - RawBson::Document( - RawDocumentBuf::from_iter([ - ("ok", RawBson::Boolean(false)), - ("other", RawBson::String("hello".to_string())), - ]) - .unwrap(), - ), - ]) - .unwrap(), - ), + cstr!("array"), + RawBson::Array(RawArrayBuf::from_iter([ + RawBson::Boolean(true), + RawBson::Document(RawDocumentBuf::from_iter([ + (cstr!("ok"), RawBson::Boolean(false)), + (cstr!("other"), RawBson::String("hello".to_string())), + ])), + ])), ), - ("bool", RawBson::Boolean(true)), - ("string", RawBson::String("some string".to_string())), - ]) - .unwrap(); + (cstr!("bool"), RawBson::Boolean(true)), + (cstr!("string"), RawBson::String("some string".to_string())), + ]); let doc = doc! { "array": [ @@ -420,22 +403,22 @@ fn from_iter() { }; let expected = doc! { "expected": doc }; - append_test(expected, |doc| doc.append("expected", doc_buf)); + append_test(expected, |doc| doc.append(cstr!("expected"), doc_buf)); } #[test] fn array_buf() { let mut arr_buf = RawArrayBuf::new(); - arr_buf.push(true).unwrap(); + arr_buf.push(true); let mut doc_buf = RawDocumentBuf::new(); - doc_buf.append("x", 3_i32).unwrap(); - doc_buf.append("string", "string").unwrap(); - arr_buf.push(doc_buf).unwrap(); + doc_buf.append(cstr!("x"), 3_i32); + doc_buf.append(cstr!("string"), "string"); + arr_buf.push(doc_buf); let mut sub_arr = RawArrayBuf::new(); - sub_arr.push("a string").unwrap(); - arr_buf.push(sub_arr).unwrap(); + sub_arr.push("a string"); + arr_buf.push(sub_arr); let arr = rawbson!([ true, diff --git a/src/tests/spec/corpus.rs b/src/tests/spec/corpus.rs index 0255cf41..dccf3e0c 100644 --- a/src/tests/spec/corpus.rs +++ b/src/tests/spec/corpus.rs @@ -201,10 +201,9 @@ fn run_test(test: TestFile) { let owned_raw_bson_field = deserializer_raw .deserialize_any(FieldVisitor(test_key.as_str(), PhantomData::)) .expect(&description); + let test_key_cstr: &crate::raw::CStr = test_key.as_str().try_into().unwrap(); let from_slice_owned_vec = - RawDocumentBuf::from_iter([(test_key, owned_raw_bson_field)]) - .expect(&description) - .into_bytes(); + RawDocumentBuf::from_iter([(test_key_cstr, owned_raw_bson_field)]).into_bytes(); // deserialize the field from raw Bytes into a Bson let deserializer_value = From 33b69c5febe8e988c2aa324b0c2b8fe34006a4e5 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 27 Jun 2025 18:52:15 -0400 Subject: [PATCH 05/19] fix doctests --- src/raw/array_buf.rs | 4 ++-- src/raw/document.rs | 10 +++++----- src/raw/document_buf.rs | 12 ++++++------ 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/raw/array_buf.rs b/src/raw/array_buf.rs index 8a6de4b1..019c649c 100644 --- a/src/raw/array_buf.rs +++ b/src/raw/array_buf.rs @@ -65,14 +65,14 @@ impl RawArrayBuf { /// /// ``` /// # use bson::error::Error; - /// use bson::raw::{RawArrayBuf, RawDocumentBuf}; + /// use bson::raw::{cstr, RawArrayBuf, RawDocumentBuf}; /// /// let mut array = RawArrayBuf::new(); /// array.push("a string"); /// array.push(12_i32); /// /// let mut doc = RawDocumentBuf::new(); - /// doc.append("a key", "a value"); + /// doc.append(cstr!("a key"), "a value"); /// array.push(doc.clone()); /// /// let mut iter = array.into_iter(); diff --git a/src/raw/document.rs b/src/raw/document.rs index fbad3a4d..6e6d6335 100644 --- a/src/raw/document.rs +++ b/src/raw/document.rs @@ -388,18 +388,18 @@ impl RawDocument { /// the key corresponds to a value which isn't a regex. /// /// ``` - /// use bson::{rawdoc, Regex}; + /// use bson::{rawdoc, Regex, raw::cstr}; /// /// let doc = rawdoc! { /// "regex": Regex { - /// pattern: r"end\s*$".into(), - /// options: "i".into(), + /// pattern: cstr!(r"end\s*$").into(), + /// options: cstr!("i").into(), /// }, /// "bool": true, /// }; /// - /// assert_eq!(doc.get_regex("regex")?.pattern, r"end\s*$"); - /// assert_eq!(doc.get_regex("regex")?.options, "i"); + /// assert_eq!(doc.get_regex("regex")?.pattern, cstr!(r"end\s*$")); + /// assert_eq!(doc.get_regex("regex")?.options, cstr!("i")); /// assert!(doc.get_regex("bool").is_err()); /// assert!(doc.get_regex("unknown").is_err()); /// # Ok::<(), Box>(()) diff --git a/src/raw/document_buf.rs b/src/raw/document_buf.rs index 42017fc5..feb4f933 100644 --- a/src/raw/document_buf.rs +++ b/src/raw/document_buf.rs @@ -183,17 +183,17 @@ impl RawDocumentBuf { /// the documentation for [BindRawBsonRef] for more details. /// ``` /// # use bson::error::Error; - /// use bson::{doc, raw::{RawBsonRef, RawDocumentBuf}}; + /// use bson::{doc, raw::{cstr, RawBsonRef, RawDocumentBuf}}; /// /// let mut doc = RawDocumentBuf::new(); /// // `&str` and `i32` both convert to `RawBsonRef` - /// doc.append("a string", "some string"); - /// doc.append("an integer", 12_i32); + /// doc.append(cstr!("a string"), "some string"); + /// doc.append(cstr!("an integer"), 12_i32); /// /// let mut subdoc = RawDocumentBuf::new(); - /// subdoc.append("a key", true); - /// doc.append("a borrowed document", &subdoc); - /// doc.append("an owned document", subdoc); + /// subdoc.append(cstr!("a key"), true); + /// doc.append(cstr!("a borrowed document"), &subdoc); + /// doc.append(cstr!("an owned document"), subdoc); /// /// let expected = doc! { /// "a string": "some string", From c7564b9d88192314e731858b4eab593d50652ea1 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 27 Jun 2025 18:55:36 -0400 Subject: [PATCH 06/19] clippy fixes --- src/de/raw.rs | 4 ++-- src/raw/cstr.rs | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/de/raw.rs b/src/de/raw.rs index d594ae72..4d3112cb 100644 --- a/src/de/raw.rs +++ b/src/de/raw.rs @@ -1307,14 +1307,14 @@ impl<'de> serde::de::Deserializer<'de> for &mut RegexAccess<'de> { self.stage = RegexDeserializationStage::Options; match &self.re { BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.pattern.as_str()), - BsonCow::Owned(re) => visitor.visit_str(&re.pattern.as_str()), + BsonCow::Owned(re) => visitor.visit_str(re.pattern.as_str()), } } RegexDeserializationStage::Options => { self.stage = RegexDeserializationStage::Done; match &self.re { BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.options.as_str()), - BsonCow::Owned(re) => visitor.visit_str(&re.options.as_str()), + BsonCow::Owned(re) => visitor.visit_str(re.options.as_str()), } } RegexDeserializationStage::Done => { diff --git a/src/raw/cstr.rs b/src/raw/cstr.rs index dd8ed854..7e44b4f2 100644 --- a/src/raw/cstr.rs +++ b/src/raw/cstr.rs @@ -39,13 +39,17 @@ impl CStr { self.as_str().len() } + pub fn is_empty(&self) -> bool { + self.as_str().is_empty() + } + pub(crate) fn append_to(&self, buf: &mut Vec) { buf.extend(&self.data); buf.push(0); } } -impl<'a, 'b> PartialEq<&'b CStr> for &'a CStr { +impl PartialEq<&CStr> for &CStr { fn eq(&self, other: &&CStr) -> bool { self.as_str() == other.as_str() } From 7944192ec01c465618c8d4368ea37d2659c93cc3 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 27 Jun 2025 18:57:07 -0400 Subject: [PATCH 07/19] fix fuzzer --- fuzz/generate_corpus.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fuzz/generate_corpus.rs b/fuzz/generate_corpus.rs index 06b16f02..fe5d70b0 100644 --- a/fuzz/generate_corpus.rs +++ b/fuzz/generate_corpus.rs @@ -1,4 +1,4 @@ -use bson::{doc, Bson, Decimal128}; +use bson::{cstr, doc, Bson, Decimal128}; use std::{ fs, io::{Error, ErrorKind}, @@ -64,7 +64,7 @@ fn generate_type_marker_cases(dir: &Path) -> std::io::Result<()> { "bool": true, "date": bson::DateTime::now(), "null": Bson::Null, - "regex": Bson::RegularExpression(bson::Regex { pattern: "pattern".into(), options: "i".into() }), + "regex": Bson::RegularExpression(bson::Regex { pattern: cstr!("pattern").into(), options: cstr!("i").into() }), "int32": 123i32, "timestamp": bson::Timestamp { time: 12345, increment: 1 }, "int64": 123i64, From a6e60e5f2bc3bd44d222dbb41f798acc524b9362 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 27 Jun 2025 19:02:12 -0400 Subject: [PATCH 08/19] AsRef for CStr --- src/raw/cstr.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/raw/cstr.rs b/src/raw/cstr.rs index 7e44b4f2..08ca66f0 100644 --- a/src/raw/cstr.rs +++ b/src/raw/cstr.rs @@ -69,6 +69,12 @@ impl AsRef for CStr { } } +impl AsRef for CStr { + fn as_ref(&self) -> &str { + self.as_str() + } +} + #[cfg(feature = "serde")] impl serde::Serialize for &CStr { fn serialize(&self, serializer: S) -> std::result::Result From b63cd15a14eeea06a3e22eb0cb4b1dacfd52817f Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 27 Jun 2025 19:09:42 -0400 Subject: [PATCH 09/19] require cstr to be a literal --- src/raw/cstr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/raw/cstr.rs b/src/raw/cstr.rs index 08ca66f0..c4b38775 100644 --- a/src/raw/cstr.rs +++ b/src/raw/cstr.rs @@ -105,7 +105,7 @@ pub const fn assert_valid_cstr() {} #[macro_export] macro_rules! cstr { - ($text:expr) => {{ + ($text:literal) => {{ const VALIDATED: Option<&$crate::raw::CStr> = $crate::raw::validate_cstr($text); const VALID: bool = VALIDATED.is_some(); $crate::raw::assert_valid_cstr::<$crate::raw::IsValidCStr>(); From fe810e9dec93859bed963eef610bcd5be2880723 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 27 Jun 2025 19:32:28 -0400 Subject: [PATCH 10/19] better rawdoc --- src/macros.rs | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/macros.rs b/src/macros.rs index edafa969..f1a18e78 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -291,19 +291,26 @@ macro_rules! rawbson { // Finished. (@object $object:ident () () ()) => {}; - // Insert the current entry followed by trailing comma. + // Insert the current entry with followed by trailing comma, with a key literal. + (@object $object:ident [$key:literal] ($value:expr) , $($rest:tt)*) => {{ + $object.append($crate::raw::cstr!($key), $value); + $crate::rawbson!(@object $object () ($($rest)*) ($($rest)*)); + }}; + + // Insert the current entry with followed by trailing comma, with a key expression. (@object $object:ident [$($key:tt)+] ($value:expr) , $($rest:tt)*) => {{ - let key: &str = ($($key)+).as_ref(); - let key: &$crate::raw::CStr = key.try_into().expect("invalid key"); - $object.append(key, $value); + $object.append($($key)+, $value); $crate::rawbson!(@object $object () ($($rest)*) ($($rest)*)); }}; - // Insert the last entry without trailing comma. + // Insert the last entry without trailing comma, with a key literal. + (@object $object:ident [$key:literal] ($value:expr)) => { + $object.append($crate::raw::cstr!($key), $value); + }; + + // Insert the last entry without trailing comma, with a key expression. (@object $object:ident [$($key:tt)+] ($value:expr)) => { - let key: &str = ($($key)+).as_ref(); - let key: &$crate::raw::CStr = key.try_into().expect("invalid key"); - $object.append(key, $value); + $object.append($($key)+, $value); }; // Next value is `null`. From 2ac2cb7f01ba96f3a8bc0fe290572e41b2b1427a Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 27 Jun 2025 19:38:34 -0400 Subject: [PATCH 11/19] fix serde-tests --- serde-tests/json.rs | 18 +++++++++--------- serde-tests/test.rs | 29 ++++++++++++++--------------- src/raw/cstr.rs | 10 ++++++++++ 3 files changed, 33 insertions(+), 24 deletions(-) diff --git a/serde-tests/json.rs b/serde-tests/json.rs index 7d6d3b2d..8b12f014 100644 --- a/serde-tests/json.rs +++ b/serde-tests/json.rs @@ -3,7 +3,7 @@ use serde_json::json; use super::AllTypes; -use bson::{doc, Bson, JavaScriptCodeWithScope, RawArrayBuf, RawBson, RawDocumentBuf}; +use bson::{cstr, doc, Bson, JavaScriptCodeWithScope, RawArrayBuf, RawBson, RawDocumentBuf}; use serde::{Deserialize, Serialize}; @@ -99,18 +99,18 @@ fn owned_raw_bson() { }); let mut doc_buf = RawDocumentBuf::new(); - doc_buf.append("a", "key").unwrap(); - doc_buf.append("number", 12).unwrap(); - doc_buf.append("bool", false).unwrap(); - doc_buf.append("nu", RawBson::Null).unwrap(); + doc_buf.append(cstr!("a"), "key"); + doc_buf.append(cstr!("number"), 12); + doc_buf.append(cstr!("bool"), false); + doc_buf.append(cstr!("nu"), RawBson::Null); let mut array_buf = RawArrayBuf::new(); - array_buf.push(1).unwrap(); - array_buf.push("string").unwrap(); + array_buf.push(1); + array_buf.push("string"); let mut bson_doc = RawDocumentBuf::new(); - bson_doc.append("first", true).unwrap(); - bson_doc.append("second", "string").unwrap(); + bson_doc.append(cstr!("first"), true); + bson_doc.append(cstr!("second"), "string"); let expected = Foo { doc_buf, diff --git a/serde-tests/test.rs b/serde-tests/test.rs index 9fbafdc4..00fe8f1c 100644 --- a/serde-tests/test.rs +++ b/serde-tests/test.rs @@ -18,6 +18,7 @@ use std::{ }; use bson::{ + cstr, doc, oid::ObjectId, spec::BinarySubtype, @@ -835,8 +836,8 @@ fn raw_regex() { let bytes = bson::serialize_to_vec(&doc! { "r": Regex { - pattern: "a[b-c]d".to_string(), - options: "ab".to_string(), + pattern: cstr!("a[b-c]d").into(), + options: cstr!("ab").into(), }, }) .expect("raw_regex"); @@ -927,8 +928,8 @@ impl AllTypes { }; let date = DateTime::now(); let regex = Regex { - pattern: "hello".to_string(), - options: "x".to_string(), + pattern: cstr!("hello").into(), + options: cstr!("x").into(), }; let timestamp = Timestamp { time: 123, @@ -1058,8 +1059,8 @@ fn all_raw_types_rmp() { scope: doc! { "x": 1 }, }, "regex": Regex { - pattern: "pattern".to_string(), - options: "opt".to_string() + pattern: cstr!("pattern").into(), + options: cstr!("opt").into() } }) .unwrap(); @@ -1254,24 +1255,22 @@ fn owned_raw_types() { let f = Foo { subdoc: RawDocumentBuf::from_iter([ - ("a key", RawBson::String("a value".to_string())), - ("an objectid", RawBson::ObjectId(oid)), - ("a date", RawBson::DateTime(dt)), + (cstr!("a key"), RawBson::String("a value".to_string())), + (cstr!("an objectid"), RawBson::ObjectId(oid)), + (cstr!("a date"), RawBson::DateTime(dt)), ( - "code_w_scope", + cstr!("code_w_scope"), RawBson::JavaScriptCodeWithScope(raw_code_w_scope.clone()), ), - ("decimal128", RawBson::Decimal128(d128)), - ]) - .unwrap(), + (cstr!("decimal128"), RawBson::Decimal128(d128)), + ]), array: RawArrayBuf::from_iter([ RawBson::String("a string".to_string()), RawBson::ObjectId(oid), RawBson::DateTime(dt), RawBson::JavaScriptCodeWithScope(raw_code_w_scope), RawBson::Decimal128(d128), - ]) - .unwrap(), + ]), }; let expected = doc! { diff --git a/src/raw/cstr.rs b/src/raw/cstr.rs index c4b38775..77747d1a 100644 --- a/src/raw/cstr.rs +++ b/src/raw/cstr.rs @@ -183,3 +183,13 @@ impl std::borrow::Borrow for CString { self.as_ref() } } + +#[cfg(feature = "serde")] +impl serde::Serialize for CString { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + self.data.serialize(serializer) + } +} From 6eeaec42962d46896169ebfdcdb3d5e2768fbe25 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 27 Jun 2025 20:00:11 -0400 Subject: [PATCH 12/19] cleanup --- src/bson.rs | 5 +++-- src/de/serde.rs | 3 ++- src/extjson/models.rs | 2 +- src/raw.rs | 12 ------------ src/raw/cstr.rs | 6 ++++++ src/raw/serde/seeded_visitor.rs | 6 ++++-- src/raw/test/append.rs | 7 +++++-- src/raw/test/props.rs | 2 +- src/ser/raw.rs | 4 ++-- src/ser/raw/document_serializer.rs | 4 ++-- 10 files changed, 26 insertions(+), 25 deletions(-) diff --git a/src/bson.rs b/src/bson.rs index d9a58c71..9b15fc55 100644 --- a/src/bson.rs +++ b/src/bson.rs @@ -848,7 +848,7 @@ impl Bson { if let Ok(regex) = doc.get_document("$regularExpression") { if let Ok(pattern) = regex.get_str("pattern") { if let Ok(options) = regex.get_str("options") { - if let Ok(regex) = Regex::new(pattern, options) { + if let Ok(regex) = Regex::from_strings(pattern, options) { return Bson::RegularExpression(regex); } } @@ -1168,7 +1168,8 @@ pub struct Regex { } impl Regex { - pub(crate) fn new( + #[cfg(feature = "serde")] + pub(crate) fn from_strings( pattern: impl AsRef, options: impl AsRef, ) -> crate::error::Result { diff --git a/src/de/serde.rs b/src/de/serde.rs index bd80f711..47af21fc 100644 --- a/src/de/serde.rs +++ b/src/de/serde.rs @@ -443,7 +443,8 @@ impl<'de> Visitor<'de> for BsonVisitor { "$regularExpression" => { let re = visitor.next_value::()?; return Ok(Bson::RegularExpression( - Regex::new(re.pattern, re.options).map_err(serde::de::Error::custom)?, + Regex::from_strings(re.pattern, re.options) + .map_err(serde::de::Error::custom)?, )); } diff --git a/src/extjson/models.rs b/src/extjson/models.rs index a2840df7..88d3a2ca 100644 --- a/src/extjson/models.rs +++ b/src/extjson/models.rs @@ -123,7 +123,7 @@ pub(crate) struct RegexBody { impl Regex { pub(crate) fn parse(self) -> crate::error::Result { - crate::Regex::new(self.body.pattern, self.body.options) + crate::Regex::from_strings(self.body.pattern, self.body.options) } } diff --git a/src/raw.rs b/src/raw.rs index 6041af6c..769b6ce3 100644 --- a/src/raw.rs +++ b/src/raw.rs @@ -318,15 +318,3 @@ pub(crate) fn write_string(buf: &mut Vec, s: &str) { buf.extend(s.as_bytes()); buf.push(0); } - -pub(crate) fn write_cstring(buf: &mut Vec, s: &str) -> Result<()> { - if s.contains('\0') { - return Err(Error::malformed_bytes(format!( - "cstring with interior null: {:?}", - s - ))); - } - buf.extend(s.as_bytes()); - buf.push(0); - Ok(()) -} diff --git a/src/raw/cstr.rs b/src/raw/cstr.rs index 77747d1a..8c7344ff 100644 --- a/src/raw/cstr.rs +++ b/src/raw/cstr.rs @@ -24,6 +24,12 @@ impl<'a> TryFrom<&'a str> for &'a CStr { } impl CStr { + // Convenience shorthand for making the types of TryFrom line up + #[cfg(feature = "serde")] + pub(crate) fn from_str(value: &str) -> Result<&CStr> { + value.try_into() + } + const fn from_str_unchecked(value: &str) -> &Self { // Safety: the conversion is safe because CStr is repr(transparent), and the deref is safe // because the pointer came from a safe reference. diff --git a/src/raw/serde/seeded_visitor.rs b/src/raw/serde/seeded_visitor.rs index cbd6c1ef..58dec86d 100644 --- a/src/raw/serde/seeded_visitor.rs +++ b/src/raw/serde/seeded_visitor.rs @@ -6,7 +6,7 @@ use serde::{ }; use crate::{ - raw::{write_cstring, write_string, RAW_BSON_NEWTYPE}, + raw::{write_string, RAW_BSON_NEWTYPE}, spec::{BinarySubtype, ElementType}, RawBson, RawBsonRef, @@ -119,7 +119,9 @@ impl<'a, 'de> SeededVisitor<'a, 'de> { /// Appends a cstring to the buffer. Returns an error if the given string contains a null byte. fn append_cstring(&mut self, key: &str) -> Result<(), String> { - write_cstring(self.buffer.get_owned_buffer(), key).map_err(|e| e.to_string()) + Ok(crate::raw::CStr::from_str(key) + .map_err(|e| e.to_string())? + .append_to(self.buffer.get_owned_buffer())) } /// Appends a string and its length to the buffer. diff --git a/src/raw/test/append.rs b/src/raw/test/append.rs index 8b9dcfea..e530ab7f 100644 --- a/src/raw/test/append.rs +++ b/src/raw/test/append.rs @@ -243,11 +243,14 @@ fn undefined() { #[test] fn regex() { let expected = doc! { - "regex": Regex::new("some pattern", "abc").unwrap(), + "regex": Regex::from_strings("some pattern", "abc").unwrap(), }; append_test(expected, |doc| { - doc.append(cstr!("regex"), Regex::new("some pattern", "abc").unwrap()) + doc.append( + cstr!("regex"), + Regex::from_strings("some pattern", "abc").unwrap(), + ) }); } diff --git a/src/raw/test/props.rs b/src/raw/test/props.rs index 535c1a46..4b5918ff 100644 --- a/src/raw/test/props.rs +++ b/src/raw/test/props.rs @@ -22,7 +22,7 @@ pub(crate) fn arbitrary_bson() -> impl Strategy { any::().prop_map(Bson::Int32), any::().prop_map(Bson::Int64), any::<(String, String)>().prop_map(|(pattern, options)| { - Bson::RegularExpression(Regex::new(pattern, options).unwrap()) + Bson::RegularExpression(Regex::from_strings(pattern, options).unwrap()) }), any::<[u8; 12]>().prop_map(|bytes| Bson::ObjectId(crate::oid::ObjectId::from_bytes(bytes))), (arbitrary_binary_subtype(), any::>()).prop_map(|(subtype, bytes)| { diff --git a/src/ser/raw.rs b/src/ser/raw.rs index 2dd4918c..73aec059 100644 --- a/src/ser/raw.rs +++ b/src/ser/raw.rs @@ -9,7 +9,7 @@ use serde::{ use self::value_serializer::{ValueSerializer, ValueType}; use crate::{ - raw::{write_cstring, CStr, RAW_ARRAY_NEWTYPE, RAW_DOCUMENT_NEWTYPE}, + raw::{CStr, RAW_ARRAY_NEWTYPE, RAW_DOCUMENT_NEWTYPE}, ser::{Error, Result}, serde_helpers::HUMAN_READABLE_NEWTYPE, spec::{BinarySubtype, ElementType}, @@ -469,7 +469,7 @@ impl<'a> VariantSerializer<'a> { T: Serialize + ?Sized, { self.root_serializer.reserve_element_type(); - write_cstring(&mut self.root_serializer.bytes, k)?; + CStr::from_str(k)?.append_to(&mut self.root_serializer.bytes); v.serialize(&mut *self.root_serializer)?; self.num_elements_serialized += 1; diff --git a/src/ser/raw/document_serializer.rs b/src/ser/raw/document_serializer.rs index c4cf03c3..a2317eb7 100644 --- a/src/ser/raw/document_serializer.rs +++ b/src/ser/raw/document_serializer.rs @@ -2,7 +2,6 @@ use serde::{ser::Impossible, Serialize}; use crate::{ error::{Error, Result}, - raw::write_cstring, RawBsonRef, }; @@ -258,7 +257,8 @@ impl serde::Serializer for KeySerializer<'_> { #[inline] fn serialize_str(self, v: &str) -> Result { - write_cstring(&mut self.root_serializer.bytes, v) + crate::raw::CStr::from_str(v)?.append_to(&mut self.root_serializer.bytes); + Ok(()) } #[inline] From a4eb15c2f7b5d26177804bfe27ae54cfd935fa19 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 27 Jun 2025 20:05:18 -0400 Subject: [PATCH 13/19] fix cfg --- src/bson.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bson.rs b/src/bson.rs index 9b15fc55..7bcac566 100644 --- a/src/bson.rs +++ b/src/bson.rs @@ -1168,7 +1168,7 @@ pub struct Regex { } impl Regex { - #[cfg(feature = "serde")] + #[cfg(any(test, feature = "serde"))] pub(crate) fn from_strings( pattern: impl AsRef, options: impl AsRef, From df0729ad13d69ff8af3b8dcb367ceda0a18e61f1 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 27 Jun 2025 20:06:00 -0400 Subject: [PATCH 14/19] fix clippy again --- src/raw/serde/seeded_visitor.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/raw/serde/seeded_visitor.rs b/src/raw/serde/seeded_visitor.rs index 58dec86d..f6b0ae95 100644 --- a/src/raw/serde/seeded_visitor.rs +++ b/src/raw/serde/seeded_visitor.rs @@ -119,9 +119,10 @@ impl<'a, 'de> SeededVisitor<'a, 'de> { /// Appends a cstring to the buffer. Returns an error if the given string contains a null byte. fn append_cstring(&mut self, key: &str) -> Result<(), String> { - Ok(crate::raw::CStr::from_str(key) + crate::raw::CStr::from_str(key) .map_err(|e| e.to_string())? - .append_to(self.buffer.get_owned_buffer())) + .append_to(self.buffer.get_owned_buffer()); + Ok(()) } /// Appends a string and its length to the buffer. From b5882bfa0ca3afffd16c77b0bed97104dca4de6a Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Wed, 2 Jul 2025 13:39:13 -0400 Subject: [PATCH 15/19] add documentation --- src/raw/cstr.rs | 32 +++++++++++++++++++++++++++++++- src/raw/document_buf.rs | 2 +- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/raw/cstr.rs b/src/raw/cstr.rs index 8c7344ff..e3976daa 100644 --- a/src/raw/cstr.rs +++ b/src/raw/cstr.rs @@ -2,7 +2,11 @@ use core::str; use crate::error::{Error, Result}; -// A BSON-spec cstring: Zero or more UTF-8 encoded characters, excluding the null byte. +/// A borrowed BSON-spec cstring: Zero or more UTF-8 encoded characters, excluding the nul byte. +/// Most conveniently constructed via the [`cstr!`](crate::raw::cstr) macro. +/// +/// Unlike [`std::ffi::CStr`], this is required to be valid UTF-8, and does not include the nul +/// terminator in the buffer. #[derive(Debug)] #[repr(transparent)] pub struct CStr { @@ -36,15 +40,18 @@ impl CStr { unsafe { &*(value.as_bytes() as *const [u8] as *const CStr) } } + /// View the buffer as a Rust `&str`. pub fn as_str(&self) -> &str { // Safety: the only way to constrct a CStr is from a valid &str. unsafe { str::from_utf8_unchecked(&self.data) } } + /// The length in bytes of the buffer. pub fn len(&self) -> usize { self.as_str().len() } + /// Whether the buffer contains zero bytes. pub fn is_empty(&self) -> bool { self.as_str().is_empty() } @@ -91,11 +98,15 @@ impl serde::Serialize for &CStr { } } +#[doc(hidden)] #[diagnostic::on_unimplemented(message = "the string literal contains a zero byte")] pub trait ValidCStr {} +#[doc(hidden)] pub struct IsValidCStr; +#[doc(hidden)] impl ValidCStr for IsValidCStr {} +#[doc(hidden)] pub const fn validate_cstr(text: &str) -> Option<&CStr> { let bytes = text.as_bytes(); let mut i = 0; @@ -107,8 +118,20 @@ pub const fn validate_cstr(text: &str) -> Option<&CStr> { } Some(CStr::from_str_unchecked(text)) } +#[doc(hidden)] pub const fn assert_valid_cstr() {} +/// Construct a `'static &CStr`. The validitiy will be verified at compile-time. +/// ``` +/// # use bson::raw::{CStr, cstr}; +/// // A valid literal: +/// let key: &CStr = cstr!("hello"); +/// ``` +/// ```compile_fail +/// # use bson::raw::{CStr, cstr}; +/// // An invalid literal will not compile: +/// let key: &CStr = cstr!("hel\0lo"); +/// ``` #[macro_export] macro_rules! cstr { ($text:literal) => {{ @@ -120,6 +143,11 @@ macro_rules! cstr { } pub use cstr; +/// An owned BSON-spec cstring: Zero or more UTF-8 encoded characters, excluding the nul byte. +/// `CString` is to `CStr` as [`String`] is to [`prim@str`]. +/// +/// Like `CStr`, this differs from [`std::ffi::CString`] in that it is required to be valid UTF-8, +/// and does not include the nul terminator in the buffer. #[derive(Clone, Eq, PartialEq, Hash)] #[repr(transparent)] pub struct CString { @@ -149,10 +177,12 @@ impl CString { Self { data } } + /// Consume `self` to return the underlying `String`. pub fn into_string(self) -> String { self.data } + /// View the buffer as a Rust `&str`. pub fn as_str(&self) -> &str { self.as_ref().as_str() } diff --git a/src/raw/document_buf.rs b/src/raw/document_buf.rs index feb4f933..c654727b 100644 --- a/src/raw/document_buf.rs +++ b/src/raw/document_buf.rs @@ -177,7 +177,7 @@ impl RawDocumentBuf { /// It is a user error to append the same key more than once to the same document, and it may /// result in errors when communicating with MongoDB. /// - /// If the provided key contains an interior null byte, this method will panic. + /// Keys can be a [`&CStr`](crate::raw::CStr) or [`CString`](crate::raw::CString). /// /// Values can be any type that can be converted to either borrowed or owned raw bson data; see /// the documentation for [BindRawBsonRef] for more details. From 1d8f3add20050a910a94171ea123f8fc82c6b9b5 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Thu, 3 Jul 2025 10:20:06 -0400 Subject: [PATCH 16/19] more doc examples --- src/raw/cstr.rs | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/raw/cstr.rs b/src/raw/cstr.rs index e3976daa..221937d1 100644 --- a/src/raw/cstr.rs +++ b/src/raw/cstr.rs @@ -2,11 +2,30 @@ use core::str; use crate::error::{Error, Result}; +#[allow(rustdoc::invalid_rust_codeblocks)] /// A borrowed BSON-spec cstring: Zero or more UTF-8 encoded characters, excluding the nul byte. /// Most conveniently constructed via the [`cstr!`](crate::raw::cstr) macro. /// /// Unlike [`std::ffi::CStr`], this is required to be valid UTF-8, and does not include the nul -/// terminator in the buffer. +/// terminator in the buffer: +/// ``` +/// // std::ffi::CStr accepts invalid UTF-8: +/// let invalid: &std::ffi::CStr = c"\xc3\x28"; +/// ``` +/// ```compile_fail +/// # use bson::raw::cstr; +/// // bson::raw::CStr does not: +/// let invalid: &bson::raw::CStr = cstr!("\xc3\x28"); // will not compile +/// ``` +/// ``` +/// // &str accepts embedded nil characters: +/// let invalid: &str = "foo\0bar"; +/// ``` +/// ```compile_fail +/// # use bson::raw::cstr; +/// // &str accepts embedded nil characters: +/// let invalid: &bson::raw::CStr = cstr!("foo\0bar"); // will not compile +/// ``` #[derive(Debug)] #[repr(transparent)] pub struct CStr { @@ -121,6 +140,7 @@ pub const fn validate_cstr(text: &str) -> Option<&CStr> { #[doc(hidden)] pub const fn assert_valid_cstr() {} +#[allow(rustdoc::invalid_rust_codeblocks)] /// Construct a `'static &CStr`. The validitiy will be verified at compile-time. /// ``` /// # use bson::raw::{CStr, cstr}; @@ -129,7 +149,12 @@ pub const fn assert_valid_cstr() {} /// ``` /// ```compile_fail /// # use bson::raw::{CStr, cstr}; -/// // An invalid literal will not compile: +/// // A literal with invalid UTF-8 will not compile: +/// let key: &CStr = cstr!("\xc3\x28"); +/// ``` +/// ```compile_fail +/// # use bson::raw::{CStr, cstr}; +/// // A literal with an embedded nil will not compile: /// let key: &CStr = cstr!("hel\0lo"); /// ``` #[macro_export] From 14e97c7016051f61ffe23762cb18a8f415e9f908 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Thu, 3 Jul 2025 10:22:55 -0400 Subject: [PATCH 17/19] fix comment --- src/raw/cstr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/raw/cstr.rs b/src/raw/cstr.rs index 221937d1..bd51bbfc 100644 --- a/src/raw/cstr.rs +++ b/src/raw/cstr.rs @@ -23,7 +23,7 @@ use crate::error::{Error, Result}; /// ``` /// ```compile_fail /// # use bson::raw::cstr; -/// // &str accepts embedded nil characters: +/// // bson::raw::CStr does not: /// let invalid: &bson::raw::CStr = cstr!("foo\0bar"); // will not compile /// ``` #[derive(Debug)] From a4b4d3aaaeabaa13d6e62c8164bc62ebfdb5bee9 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Thu, 3 Jul 2025 10:31:21 -0400 Subject: [PATCH 18/19] more doc updates --- src/raw/cstr.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/raw/cstr.rs b/src/raw/cstr.rs index bd51bbfc..e8c9264f 100644 --- a/src/raw/cstr.rs +++ b/src/raw/cstr.rs @@ -4,7 +4,8 @@ use crate::error::{Error, Result}; #[allow(rustdoc::invalid_rust_codeblocks)] /// A borrowed BSON-spec cstring: Zero or more UTF-8 encoded characters, excluding the nul byte. -/// Most conveniently constructed via the [`cstr!`](crate::raw::cstr) macro. +/// Can be constructed at compile-time via the [`cstr!`](crate::raw::cstr) macro or at run-time via +/// the [`TryFrom`] impl. /// /// Unlike [`std::ffi::CStr`], this is required to be valid UTF-8, and does not include the nul /// terminator in the buffer: @@ -169,7 +170,8 @@ macro_rules! cstr { pub use cstr; /// An owned BSON-spec cstring: Zero or more UTF-8 encoded characters, excluding the nul byte. -/// `CString` is to `CStr` as [`String`] is to [`prim@str`]. +/// `CString` is to `CStr` as [`String`] is to [`prim@str`]. Can be constructed from a [`CStr`] via +/// [`ToOwned`]/[`Into`] or from a [`String`] or [`prim@str`] via [`TryFrom`]. /// /// Like `CStr`, this differs from [`std::ffi::CString`] in that it is required to be valid UTF-8, /// and does not include the nul terminator in the buffer. From e6e57ef5a2267befb5202fb73b0b2711b312a13d Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Thu, 3 Jul 2025 10:33:14 -0400 Subject: [PATCH 19/19] one final doc tweak --- src/raw/cstr.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/raw/cstr.rs b/src/raw/cstr.rs index e8c9264f..842f872d 100644 --- a/src/raw/cstr.rs +++ b/src/raw/cstr.rs @@ -4,8 +4,8 @@ use crate::error::{Error, Result}; #[allow(rustdoc::invalid_rust_codeblocks)] /// A borrowed BSON-spec cstring: Zero or more UTF-8 encoded characters, excluding the nul byte. -/// Can be constructed at compile-time via the [`cstr!`](crate::raw::cstr) macro or at run-time via -/// the [`TryFrom`] impl. +/// Can be constructed at compile-time via the [`cstr!`](crate::raw::cstr) macro or at run-time from +/// a [`prim@str`] via [`TryFrom`]. /// /// Unlike [`std::ffi::CStr`], this is required to be valid UTF-8, and does not include the nul /// terminator in the buffer: