mongodb · abr-egn · Jul 3, 2025 · Jun 26, 2025 · Jun 27, 2025 · Jun 27, 2025
@@ -1,4 +1,4 @@
-use bson::{doc, Bson, Decimal128};
+use bson::{cstr, doc, Bson, Decimal128};
 use std::{
     fs,
     io::{Error, ErrorKind},
@@ -64,7 +64,7 @@ fn generate_type_marker_cases(dir: &Path) -> std::io::Result<()> {
         "bool": true,
         "date": bson::DateTime::now(),
         "null": Bson::Null,
-        "regex": Bson::RegularExpression(bson::Regex { pattern: "pattern".into(), options: "i".into() }),
+        "regex": Bson::RegularExpression(bson::Regex { pattern: cstr!("pattern").into(), options: cstr!("i").into() }),
         "int32": 123i32,
         "timestamp": bson::Timestamp { time: 12345, increment: 1 },
         "int64": 123i64,

@@ -3,7 +3,7 @@ use serde_json::json;
 
 use super::AllTypes;
 
-use bson::{doc, Bson, JavaScriptCodeWithScope, RawArrayBuf, RawBson, RawDocumentBuf};
+use bson::{cstr, doc, Bson, JavaScriptCodeWithScope, RawArrayBuf, RawBson, RawDocumentBuf};
 
 use serde::{Deserialize, Serialize};
 
@@ -99,18 +99,18 @@ fn owned_raw_bson() {
     });
 
     let mut doc_buf = RawDocumentBuf::new();
-    doc_buf.append("a", "key").unwrap();
-    doc_buf.append("number", 12).unwrap();
-    doc_buf.append("bool", false).unwrap();
-    doc_buf.append("nu", RawBson::Null).unwrap();
+    doc_buf.append(cstr!("a"), "key");
+    doc_buf.append(cstr!("number"), 12);
+    doc_buf.append(cstr!("bool"), false);
+    doc_buf.append(cstr!("nu"), RawBson::Null);
 
     let mut array_buf = RawArrayBuf::new();
-    array_buf.push(1).unwrap();
-    array_buf.push("string").unwrap();
+    array_buf.push(1);
+    array_buf.push("string");
 
     let mut bson_doc = RawDocumentBuf::new();
-    bson_doc.append("first", true).unwrap();
-    bson_doc.append("second", "string").unwrap();
+    bson_doc.append(cstr!("first"), true);
+    bson_doc.append(cstr!("second"), "string");
 
     let expected = Foo {
         doc_buf,

@@ -18,6 +18,7 @@ use std::{
 };
 
 use bson::{
+    cstr,
     doc,
     oid::ObjectId,
     spec::BinarySubtype,
@@ -835,8 +836,8 @@ fn raw_regex() {
 
     let bytes = bson::serialize_to_vec(&doc! {
         "r": Regex {
-            pattern: "a[b-c]d".to_string(),
-            options: "ab".to_string(),
+            pattern: cstr!("a[b-c]d").into(),
+            options: cstr!("ab").into(),
         },
     })
     .expect("raw_regex");
@@ -927,8 +928,8 @@ impl AllTypes {
         };
         let date = DateTime::now();
         let regex = Regex {
-            pattern: "hello".to_string(),
-            options: "x".to_string(),
+            pattern: cstr!("hello").into(),
+            options: cstr!("x").into(),
         };
         let timestamp = Timestamp {
             time: 123,
@@ -1058,8 +1059,8 @@ fn all_raw_types_rmp() {
             scope: doc! { "x": 1 },
         },
         "regex": Regex {
-            pattern: "pattern".to_string(),
-            options: "opt".to_string()
+            pattern: cstr!("pattern").into(),
+            options: cstr!("opt").into()
         }
     })
     .unwrap();
@@ -1254,24 +1255,22 @@ fn owned_raw_types() {
 
     let f = Foo {
         subdoc: RawDocumentBuf::from_iter([
-            ("a key", RawBson::String("a value".to_string())),
-            ("an objectid", RawBson::ObjectId(oid)),
-            ("a date", RawBson::DateTime(dt)),
+            (cstr!("a key"), RawBson::String("a value".to_string())),
+            (cstr!("an objectid"), RawBson::ObjectId(oid)),
+            (cstr!("a date"), RawBson::DateTime(dt)),
             (
-                "code_w_scope",
+                cstr!("code_w_scope"),
                 RawBson::JavaScriptCodeWithScope(raw_code_w_scope.clone()),
             ),
-            ("decimal128", RawBson::Decimal128(d128)),
-        ])
-        .unwrap(),
+            (cstr!("decimal128"), RawBson::Decimal128(d128)),
+        ]),
         array: RawArrayBuf::from_iter([
             RawBson::String("a string".to_string()),
             RawBson::ObjectId(oid),
             RawBson::DateTime(dt),
             RawBson::JavaScriptCodeWithScope(raw_code_w_scope),
             RawBson::Decimal128(d128),
-        ])
-        .unwrap(),
+        ]),
     };
 
     let expected = doc! {

@@ -31,7 +31,7 @@ use std::{
 use serde_json::{json, Value};
 
 pub use crate::document::Document;
-use crate::{base64, oid, spec::ElementType, Binary, Decimal128};
+use crate::{base64, oid, raw::CString, spec::ElementType, Binary, Decimal128};
 
 /// Possible BSON value types.
 #[derive(Clone, Default, PartialEq)]
@@ -268,6 +268,12 @@ impl From<String> for Bson {
     }
 }
 
+impl From<crate::raw::CString> for Bson {
+    fn from(a: crate::raw::CString) -> Bson {
+        Bson::String(a.into_string())
+    }
+}
+
 impl From<Document> for Bson {
     fn from(a: Document) -> Bson {
         Bson::Document(a)
@@ -480,14 +486,14 @@ impl Bson {
             Bson::Boolean(v) => json!(v),
             Bson::Null => Value::Null,
             Bson::RegularExpression(Regex { pattern, options }) => {
-                let mut chars: Vec<_> = options.chars().collect();
+                let mut chars: Vec<_> = options.as_str().chars().collect();
                 chars.sort_unstable();
 
                 let options: String = chars.into_iter().collect();
 
                 json!({
                     "$regularExpression": {
-                        "pattern": pattern,
+                        "pattern": pattern.into_string(),
                         "options": options,
                     }
                 })
@@ -619,7 +625,7 @@ impl Bson {
                 ref pattern,
                 ref options,
             }) => {
-                let mut chars: Vec<_> = options.chars().collect();
+                let mut chars: Vec<_> = options.as_str().chars().collect();
                 chars.sort_unstable();
 
                 let options: String = chars.into_iter().collect();
@@ -842,7 +848,9 @@ impl Bson {
                 if let Ok(regex) = doc.get_document("$regularExpression") {
                     if let Ok(pattern) = regex.get_str("pattern") {
                         if let Ok(options) = regex.get_str("options") {
-                            return Bson::RegularExpression(Regex::new(pattern, options));
+                            if let Ok(regex) = Regex::from_strings(pattern, options) {
+                                return Bson::RegularExpression(regex);
+                            }
                         }
                     }
                 }
@@ -1147,7 +1155,7 @@ impl Timestamp {
 #[derive(Debug, Clone, Eq, PartialEq, Hash)]
 pub struct Regex {
     /// The regex pattern to match.
-    pub pattern: String,
+    pub pattern: CString,
 
     /// The options for the regex.
     ///
@@ -1156,18 +1164,22 @@ pub struct Regex {
     /// multiline matching, 'x' for verbose mode, 'l' to make \w, \W, etc. locale dependent,
     /// 's' for dotall mode ('.' matches everything), and 'u' to make \w, \W, etc. match
     /// unicode.
-    pub options: String,
+    pub options: CString,
 }
 
 impl Regex {
-    pub(crate) fn new(pattern: impl AsRef<str>, options: impl AsRef<str>) -> Self {
+    #[cfg(any(test, feature = "serde"))]
+    pub(crate) fn from_strings(
+        pattern: impl AsRef<str>,
+        options: impl AsRef<str>,
+    ) -> crate::error::Result<Self> {
         let mut chars: Vec<_> = options.as_ref().chars().collect();
         chars.sort_unstable();
         let options: String = chars.into_iter().collect();
-        Self {
-            pattern: pattern.as_ref().to_string(),
-            options,
-        }
+        Ok(Self {
+            pattern: pattern.as_ref().to_string().try_into()?,
+            options: options.try_into()?,
+        })
     }
 }
 

@@ -1306,15 +1306,15 @@ impl<'de> serde::de::Deserializer<'de> for &mut RegexAccess<'de> {
             RegexDeserializationStage::Pattern => {
                 self.stage = RegexDeserializationStage::Options;
                 match &self.re {
-                    BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.pattern),
-                    BsonCow::Owned(re) => visitor.visit_str(&re.pattern),
+                    BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.pattern.as_str()),
+                    BsonCow::Owned(re) => visitor.visit_str(re.pattern.as_str()),
                 }
             }
             RegexDeserializationStage::Options => {
                 self.stage = RegexDeserializationStage::Done;
                 match &self.re {
-                    BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.options),
-                    BsonCow::Owned(re) => visitor.visit_str(&re.options),
+                    BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.options.as_str()),
+                    BsonCow::Owned(re) => visitor.visit_str(re.options.as_str()),
                 }
             }
             RegexDeserializationStage::Done => {

@@ -442,7 +442,10 @@ impl<'de> Visitor<'de> for BsonVisitor {
 
                 "$regularExpression" => {
                     let re = visitor.next_value::<extjson::models::RegexBody>()?;
-                    return Ok(Bson::RegularExpression(Regex::new(re.pattern, re.options)));
+                    return Ok(Bson::RegularExpression(
+                        Regex::from_strings(re.pattern, re.options)
+                            .map_err(serde::de::Error::custom)?,
+                    ));
                 }
 
                 "$dbPointer" => {

@@ -55,7 +55,7 @@ impl TryFrom<serde_json::Map<String, serde_json::Value>> for Bson {
 
         if obj.contains_key("$regularExpression") {
             let regex: models::Regex = serde_json::from_value(obj.into())?;
-            return Ok(regex.parse().into());
+            return Ok(regex.parse()?.into());
         }
 
         if obj.contains_key("$numberInt") {

@@ -122,8 +122,8 @@ pub(crate) struct RegexBody {
 }
 
 impl Regex {
-    pub(crate) fn parse(self) -> crate::Regex {
-        crate::Regex::new(self.body.pattern, self.body.options)
+    pub(crate) fn parse(self) -> crate::error::Result<crate::Regex> {
+        crate::Regex::from_strings(self.body.pattern, self.body.options)
     }
 }
 

@@ -240,12 +240,12 @@ macro_rules! rawbson {
 
     // Finished with trailing comma.
     (@array [$($elems:expr,)*]) => {
-        $crate::RawArrayBuf::from_iter(vec![$($elems,)*]).expect("invalid bson value")
+        $crate::RawArrayBuf::from_iter(vec![$($elems,)*])
     };
 
     // Finished without trailing comma.
     (@array [$($elems:expr),*]) => {
-        $crate::RawArrayBuf::from_iter(vec![$($elems),*]).expect("invalid bson value")
+        $crate::RawArrayBuf::from_iter(vec![$($elems),*])
     };
 
     // Next element is `null`.
@@ -291,15 +291,26 @@ macro_rules! rawbson {
     // Finished.
     (@object $object:ident () () ()) => {};
 
-    // Insert the current entry followed by trailing comma.
-    (@object $object:ident [$($key:tt)+] ($value:expr) , $($rest:tt)*) => {
-        $object.append(($($key)+), $value).expect("invalid bson value");
+    // Insert the current entry with followed by trailing comma, with a key literal.
+    (@object $object:ident [$key:literal] ($value:expr) , $($rest:tt)*) => {{
+        $object.append($crate::raw::cstr!($key), $value);
+        $crate::rawbson!(@object $object () ($($rest)*) ($($rest)*));
+    }};
+
+    // Insert the current entry with followed by trailing comma, with a key expression.
+    (@object $object:ident [$($key:tt)+] ($value:expr) , $($rest:tt)*) => {{
+        $object.append($($key)+, $value);
         $crate::rawbson!(@object $object () ($($rest)*) ($($rest)*));
+    }};
+
+    // Insert the last entry without trailing comma, with a key literal.
+    (@object $object:ident [$key:literal] ($value:expr)) => {
+        $object.append($crate::raw::cstr!($key), $value);
     };
 
-    // Insert the last entry without trailing comma.
+    // Insert the last entry without trailing comma, with a key expression.
     (@object $object:ident [$($key:tt)+] ($value:expr)) => {
-        $object.append(($($key)+), $value).expect("invalid bson value");
+        $object.append($($key)+, $value);
     };
 
     // Next value is `null`.

@@ -116,6 +116,7 @@ mod array;
 mod array_buf;
 mod bson;
 mod bson_ref;
+mod cstr;
 mod document;
 mod document_buf;
 mod iter;
@@ -142,6 +143,7 @@ pub use self::{
         RawJavaScriptCodeWithScopeRef,
         RawRegexRef,
     },
+    cstr::{assert_valid_cstr, cstr, validate_cstr, CStr, CString, IsValidCStr},
     document::RawDocument,
     document_buf::{BindRawBsonRef, BindValue, RawDocumentBuf},
     iter::{RawElement, RawIter},
@@ -316,15 +318,3 @@ pub(crate) fn write_string(buf: &mut Vec<u8>, s: &str) {
     buf.extend(s.as_bytes());
     buf.push(0);
 }
-
-pub(crate) fn write_cstring(buf: &mut Vec<u8>, s: &str) -> Result<()> {
-    if s.contains('\0') {
-        return Err(Error::malformed_bytes(format!(
-            "cstring with interior null: {:?}",
-            s
-        )));
-    }
-    buf.extend(s.as_bytes());
-    buf.push(0);
-    Ok(())
-}