From a1769f6536cf31a66480621b987fa59d5cd289ba Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Tue, 24 Jun 2025 11:05:12 -0400 Subject: [PATCH 1/6] clean up warning --- src/binary.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/binary.rs b/src/binary.rs index 58416ee1..9c874d96 100644 --- a/src/binary.rs +++ b/src/binary.rs @@ -1,10 +1,7 @@ #! Module containing functionality related to BSON binary values. mod vector; -use std::{ - convert::TryFrom, - fmt::{self, Display}, -}; +use std::fmt::{self, Display}; use crate::{ base64, @@ -65,6 +62,8 @@ impl Binary { #[cfg(feature = "serde")] pub(crate) fn from_extended_doc(doc: &crate::Document) -> Option { + use std::convert::TryFrom; + let binary_doc = doc.get_document("$binary").ok()?; if let Ok(bytes) = binary_doc.get_str("base64") { From d902c69514c59b75b6135c978499fba06017dbb2 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Tue, 24 Jun 2025 12:33:51 -0400 Subject: [PATCH 2/6] binary out of ser --- src/de.rs | 2 -- src/raw/bson_ref.rs | 19 +++++++++++ src/raw/document_buf/raw_writer.rs | 20 +---------- src/ser.rs | 53 ------------------------------ src/ser/raw.rs | 30 ++++++++--------- src/ser/raw/document_serializer.rs | 5 +-- src/ser/raw/value_serializer.rs | 33 +++++++++++-------- 7 files changed, 57 insertions(+), 105 deletions(-) diff --git a/src/de.rs b/src/de.rs index 821a4dca..76a9bcda 100644 --- a/src/de.rs +++ b/src/de.rs @@ -46,8 +46,6 @@ pub(crate) use self::serde::{convert_unsigned_to_signed_raw, BsonVisitor}; #[cfg(test)] pub(crate) use self::raw::Deserializer as RawDeserializer; -pub(crate) const MAX_BSON_SIZE: i32 = i32::MAX; - /// Hint provided to the deserializer via `deserialize_newtype_struct` as to the type of thing /// being deserialized. #[derive(Debug, Clone, Copy)] diff --git a/src/raw/bson_ref.rs b/src/raw/bson_ref.rs index 2edceaa7..b026e17e 100644 --- a/src/raw/bson_ref.rs +++ b/src/raw/bson_ref.rs @@ -285,6 +285,25 @@ impl<'a> RawBsonRef<'a> { }), } } + + #[inline] + pub(crate) fn append_to(self, dest: &mut Vec) -> Result<()> { + Ok(match self { + Self::Int32(val) => dest.extend(val.to_le_bytes()), + Self::Int64(val) => dest.extend(val.to_le_bytes()), + Self::Double(val) => dest.extend(val.to_le_bytes()), + Self::Binary(b @ RawBinaryRef { subtype, bytes }) => { + let len = b.len(); + dest.extend(len.to_le_bytes()); + dest.push(subtype.into()); + if let BinarySubtype::BinaryOld = subtype { + dest.extend((len - 4).to_le_bytes()) + } + dest.extend(bytes); + } + _ => todo!(), + }) + } } #[cfg(feature = "serde")] diff --git a/src/raw/document_buf/raw_writer.rs b/src/raw/document_buf/raw_writer.rs index 59edfc82..d1134a6a 100644 --- a/src/raw/document_buf/raw_writer.rs +++ b/src/raw/document_buf/raw_writer.rs @@ -1,6 +1,5 @@ use crate::{ raw::{write_cstring, write_string}, - spec::BinarySubtype, RawBsonRef, }; @@ -20,9 +19,6 @@ impl<'a> RawWriter<'a> { write_cstring(self.data, key)?; match value { - RawBsonRef::Int32(i) => { - self.data.extend(i.to_le_bytes()); - } RawBsonRef::String(s) => { write_string(self.data, s); } @@ -32,15 +28,6 @@ impl<'a> RawWriter<'a> { RawBsonRef::Array(a) => { self.data.extend(a.as_bytes()); } - RawBsonRef::Binary(b) => { - let len = b.len(); - self.data.extend(len.to_le_bytes()); - self.data.push(b.subtype.into()); - if let BinarySubtype::BinaryOld = b.subtype { - self.data.extend((len - 4).to_le_bytes()) - } - self.data.extend(b.bytes); - } RawBsonRef::Boolean(b) => { self.data.push(b as u8); } @@ -54,12 +41,6 @@ impl<'a> RawWriter<'a> { RawBsonRef::Decimal128(d) => { self.data.extend(d.bytes()); } - RawBsonRef::Double(d) => { - self.data.extend(d.to_le_bytes()); - } - RawBsonRef::Int64(i) => { - self.data.extend(i.to_le_bytes()); - } RawBsonRef::RegularExpression(re) => { write_cstring(self.data, re.pattern)?; write_cstring(self.data, re.options)?; @@ -83,6 +64,7 @@ impl<'a> RawWriter<'a> { write_string(self.data, s); } RawBsonRef::Null | RawBsonRef::Undefined | RawBsonRef::MinKey | RawBsonRef::MaxKey => {} + value => value.append_to(self.data)?, } // append trailing null byte diff --git a/src/ser.rs b/src/ser.rs index 43ead226..5d925ab6 100644 --- a/src/ser.rs +++ b/src/ser.rs @@ -30,68 +30,15 @@ pub use self::{ serde::Serializer, }; -use std::io::Write; - #[rustfmt::skip] use ::serde::{ser::Error as SerdeError, Serialize}; use crate::{ bson::{Bson, Document}, - de::MAX_BSON_SIZE, ser::serde::SerializerOptions, - spec::BinarySubtype, RawDocumentBuf, }; -#[inline] -pub(crate) fn write_i32(writer: &mut W, val: i32) -> Result<()> { - writer - .write_all(&val.to_le_bytes()) - .map(|_| ()) - .map_err(From::from) -} - -#[inline] -fn write_i64(writer: &mut W, val: i64) -> Result<()> { - writer - .write_all(&val.to_le_bytes()) - .map(|_| ()) - .map_err(From::from) -} - -#[inline] -fn write_f64(writer: &mut W, val: f64) -> Result<()> { - writer - .write_all(&val.to_le_bytes()) - .map(|_| ()) - .map_err(From::from) -} - -#[inline] -fn write_binary(mut writer: W, bytes: &[u8], subtype: BinarySubtype) -> Result<()> { - let len = if let BinarySubtype::BinaryOld = subtype { - bytes.len() + 4 - } else { - bytes.len() - }; - - if len > MAX_BSON_SIZE as usize { - return Err(Error::custom(format!( - "binary length {} exceeded maximum size", - bytes.len() - ))); - } - - write_i32(&mut writer, len as i32)?; - writer.write_all(&[subtype.into()])?; - - if let BinarySubtype::BinaryOld = subtype { - write_i32(&mut writer, len as i32 - 4)?; - }; - - writer.write_all(bytes).map_err(From::from) -} - /// Encode a `T` Serializable into a [`Bson`] value. /// /// The [`Serializer`] used by this function presents itself as human readable, whereas the diff --git a/src/ser/raw.rs b/src/ser/raw.rs index 480fdd46..d35411c2 100644 --- a/src/ser/raw.rs +++ b/src/ser/raw.rs @@ -10,13 +10,14 @@ use serde::{ use self::value_serializer::{ValueSerializer, ValueType}; -use super::{write_binary, write_f64, write_i32, write_i64}; use crate::{ raw::{write_cstring, write_string, RAW_ARRAY_NEWTYPE, RAW_DOCUMENT_NEWTYPE}, ser::{Error, Result}, serde_helpers::HUMAN_READABLE_NEWTYPE, spec::{BinarySubtype, ElementType}, uuid::UUID_NEWTYPE_NAME, + RawBinaryRef, + RawBsonRef, }; use document_serializer::DocumentSerializer; @@ -104,6 +105,12 @@ impl Serializer { let portion = &mut self.bytes[at..at + 4]; portion.copy_from_slice(&with.to_le_bytes()); } + + fn serialize_raw(&mut self, v: RawBsonRef) -> Result<()> { + self.update_element_type(v.element_type())?; + v.append_to(&mut self.bytes)?; + Ok(()) + } } impl<'a> serde::Serializer for &'a mut Serializer { @@ -141,16 +148,12 @@ impl<'a> serde::Serializer for &'a mut Serializer { #[inline] fn serialize_i32(self, v: i32) -> Result { - self.update_element_type(ElementType::Int32)?; - write_i32(&mut self.bytes, v)?; - Ok(()) + self.serialize_raw(RawBsonRef::Int32(v)) } #[inline] fn serialize_i64(self, v: i64) -> Result { - self.update_element_type(ElementType::Int64)?; - write_i64(&mut self.bytes, v)?; - Ok(()) + self.serialize_raw(RawBsonRef::Int64(v)) } #[inline] @@ -185,8 +188,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { #[inline] fn serialize_f64(self, v: f64) -> Result { - self.update_element_type(ElementType::Double)?; - write_f64(&mut self.bytes, v) + self.serialize_raw(RawBsonRef::Double(v)) } #[inline] @@ -215,15 +217,12 @@ impl<'a> serde::Serializer for &'a mut Serializer { self.bytes.write_all(v)?; } hint => { - self.update_element_type(ElementType::Binary)?; - let subtype = if matches!(hint, SerializerHint::Uuid) { BinarySubtype::Uuid } else { BinarySubtype::Generic }; - - write_binary(&mut self.bytes, v, subtype)?; + self.serialize_raw(RawBsonRef::Binary(RawBinaryRef { subtype, bytes: v }))?; } }; Ok(()) @@ -444,7 +443,8 @@ impl<'a> VariantSerializer<'a> { ) -> Result { let doc_start = rs.bytes.len(); // write placeholder length for document, will be updated at end - write_i32(&mut rs.bytes, 0)?; + static ZERO: RawBsonRef = RawBsonRef::Int32(0); + ZERO.append_to(&mut rs.bytes)?; let inner = match inner_type { VariantInnerType::Struct => ElementType::EmbeddedDocument, @@ -454,7 +454,7 @@ impl<'a> VariantSerializer<'a> { write_cstring(&mut rs.bytes, variant)?; let inner_start = rs.bytes.len(); // write placeholder length for inner, will be updated at end - write_i32(&mut rs.bytes, 0)?; + ZERO.append_to(&mut rs.bytes)?; Ok(Self { root_serializer: rs, diff --git a/src/ser/raw/document_serializer.rs b/src/ser/raw/document_serializer.rs index 10e3b93e..55da28d2 100644 --- a/src/ser/raw/document_serializer.rs +++ b/src/ser/raw/document_serializer.rs @@ -2,9 +2,10 @@ use serde::{ser::Impossible, Serialize}; use crate::{ raw::write_cstring, - ser::{write_i32, Error, Result}, + ser::{Error, Result}, serialize_to_bson, Bson, + RawBsonRef, }; use super::Serializer; @@ -23,7 +24,7 @@ pub(crate) struct DocumentSerializer<'a> { impl<'a> DocumentSerializer<'a> { pub(crate) fn start(rs: &'a mut Serializer) -> crate::ser::Result { let start = rs.bytes.len(); - write_i32(&mut rs.bytes, 0)?; + RawBsonRef::Int32(0).append_to(&mut rs.bytes)?; Ok(Self { root_serializer: rs, num_keys_serialized: 0, diff --git a/src/ser/raw/value_serializer.rs b/src/ser/raw/value_serializer.rs index 45a4ff6a..b466256c 100644 --- a/src/ser/raw/value_serializer.rs +++ b/src/ser/raw/value_serializer.rs @@ -9,8 +9,10 @@ use crate::{ base64, oid::ObjectId, raw::{write_cstring, write_string, RAW_DOCUMENT_NEWTYPE}, - ser::{write_binary, write_i32, write_i64, Error, Result}, + ser::{Error, Result}, spec::{BinarySubtype, ElementType}, + RawBinaryRef, + RawBsonRef, RawDocument, RawJavaScriptCodeWithScopeRef, }; @@ -189,11 +191,11 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { Ok(()) } SerializationStep::TimestampIncrement { time } => { - let t = u32::try_from(time).map_err(Error::custom)?; - let i = u32::try_from(v).map_err(Error::custom)?; + let t = RawBsonRef::Int32(u32::try_from(time).map_err(Error::custom)? as i32); + let i = RawBsonRef::Int32(u32::try_from(v).map_err(Error::custom)? as i32); - write_i32(&mut self.root_serializer.bytes, i as i32)?; - write_i32(&mut self.root_serializer.bytes, t as i32)?; + i.append_to(&mut self.root_serializer.bytes)?; + t.append_to(&mut self.root_serializer.bytes)?; Ok(()) } _ => Err(self.invalid_step("i64")), @@ -204,7 +206,11 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { fn serialize_u8(self, v: u8) -> Result { match self.state { SerializationStep::RawBinarySubType { ref bytes } => { - write_binary(&mut self.root_serializer.bytes, bytes.as_slice(), v.into())?; + let binary = RawBinaryRef { + subtype: v.into(), + bytes: &bytes, + }; + RawBsonRef::Binary(binary).append_to(&mut self.root_serializer.bytes)?; self.state = SerializationStep::Done; Ok(()) } @@ -245,8 +251,8 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { fn serialize_str(self, v: &str) -> Result { match &self.state { SerializationStep::DateTimeNumberLong => { - let millis: i64 = v.parse().map_err(Error::custom)?; - write_i64(&mut self.root_serializer.bytes, millis)?; + let millis = RawBsonRef::Int64(v.parse().map_err(Error::custom)?); + millis.append_to(&mut self.root_serializer.bytes)?; } SerializationStep::Oid => { let oid = ObjectId::parse_str(v).map_err(Error::custom)?; @@ -260,10 +266,9 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { SerializationStep::BinarySubType { base64 } => { let subtype_byte = hex::decode(v).map_err(Error::custom)?; let subtype: BinarySubtype = subtype_byte[0].into(); - - let bytes = base64::decode(base64.as_str()).map_err(Error::custom)?; - - write_binary(&mut self.root_serializer.bytes, bytes.as_slice(), subtype)?; + let bytes = &base64::decode(base64.as_str()).map_err(Error::custom)?; + let binary = RawBinaryRef { subtype, bytes }; + RawBsonRef::Binary(binary).append_to(&mut self.root_serializer.bytes)?; } SerializationStep::Symbol | SerializationStep::DbPointerRef => { write_string(&mut self.root_serializer.bytes, v); @@ -313,7 +318,7 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { code, scope: RawDocument::decode_from_bytes(v).map_err(Error::custom)?, }; - write_i32(&mut self.root_serializer.bytes, raw.len())?; + RawBsonRef::Int32(raw.len()).append_to(&mut self.root_serializer.bytes)?; write_string(&mut self.root_serializer.bytes, code); self.root_serializer.bytes.write_all(v)?; self.state = SerializationStep::Done; @@ -590,7 +595,7 @@ impl<'a> CodeWithScopeSerializer<'a> { #[inline] fn start(code: &str, rs: &'a mut Serializer) -> Result { let start = rs.bytes.len(); - write_i32(&mut rs.bytes, 0)?; // placeholder length + RawBsonRef::Int32(0).append_to(&mut rs.bytes)?; // placeholder length write_string(&mut rs.bytes, code); let doc = DocumentSerializer::start(rs)?; From bbbb5459d995b69381d2483b593e9fc92fb1836a Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Tue, 24 Jun 2025 12:48:37 -0400 Subject: [PATCH 3/6] full append_to --- src/raw/bson_ref.rs | 28 +++++++++++++-- src/raw/document_buf/raw_writer.rs | 55 ++---------------------------- 2 files changed, 28 insertions(+), 55 deletions(-) diff --git a/src/raw/bson_ref.rs b/src/raw/bson_ref.rs index b026e17e..e8ec6dd8 100644 --- a/src/raw/bson_ref.rs +++ b/src/raw/bson_ref.rs @@ -3,7 +3,7 @@ use std::convert::{TryFrom, TryInto}; use super::{bson::RawBson, Error, RawArray, RawDocument, Result}; use crate::{ oid::{self, ObjectId}, - raw::RawJavaScriptCodeWithScope, + raw::{write_cstring, write_string, RawJavaScriptCodeWithScope}, spec::{BinarySubtype, ElementType}, Binary, Bson, @@ -301,7 +301,31 @@ impl<'a> RawBsonRef<'a> { } dest.extend(bytes); } - _ => todo!(), + Self::String(s) => write_string(dest, s), + Self::Array(raw_array) => dest.extend(raw_array.as_bytes()), + Self::Document(raw_document) => dest.extend(raw_document.as_bytes()), + Self::Boolean(b) => dest.push(b as u8), + Self::RegularExpression(re) => { + write_cstring(dest, re.pattern)?; + write_cstring(dest, re.options)?; + } + Self::JavaScriptCode(js) => write_string(dest, js), + Self::JavaScriptCodeWithScope(code_w_scope) => { + let len = code_w_scope.len(); + dest.extend(len.to_le_bytes()); + write_string(dest, code_w_scope.code); + dest.extend(code_w_scope.scope.as_bytes()); + } + Self::Timestamp(ts) => dest.extend(ts.to_le_bytes()), + Self::ObjectId(oid) => dest.extend(oid.bytes()), + Self::DateTime(dt) => dest.extend(dt.timestamp_millis().to_le_bytes()), + Self::Symbol(s) => write_string(dest, s), + Self::Decimal128(d) => dest.extend(d.bytes()), + Self::DbPointer(dbp) => { + write_string(dest, dbp.namespace); + dest.extend(dbp.id.bytes()); + } + Self::Null | Self::Undefined | Self::MinKey | Self::MaxKey => {} }) } } diff --git a/src/raw/document_buf/raw_writer.rs b/src/raw/document_buf/raw_writer.rs index d1134a6a..dc550dbc 100644 --- a/src/raw/document_buf/raw_writer.rs +++ b/src/raw/document_buf/raw_writer.rs @@ -1,7 +1,4 @@ -use crate::{ - raw::{write_cstring, write_string}, - RawBsonRef, -}; +use crate::{raw::write_cstring, RawBsonRef}; pub(super) struct RawWriter<'a> { data: &'a mut Vec, @@ -17,55 +14,7 @@ impl<'a> RawWriter<'a> { self.data[original_len - 1] = value.element_type() as u8; write_cstring(self.data, key)?; - - match value { - RawBsonRef::String(s) => { - write_string(self.data, s); - } - RawBsonRef::Document(d) => { - self.data.extend(d.as_bytes()); - } - RawBsonRef::Array(a) => { - self.data.extend(a.as_bytes()); - } - RawBsonRef::Boolean(b) => { - self.data.push(b as u8); - } - RawBsonRef::DateTime(dt) => { - self.data.extend(dt.timestamp_millis().to_le_bytes()); - } - RawBsonRef::DbPointer(dbp) => { - write_string(self.data, dbp.namespace); - self.data.extend(dbp.id.bytes()); - } - RawBsonRef::Decimal128(d) => { - self.data.extend(d.bytes()); - } - RawBsonRef::RegularExpression(re) => { - write_cstring(self.data, re.pattern)?; - write_cstring(self.data, re.options)?; - } - RawBsonRef::JavaScriptCode(js) => { - write_string(self.data, js); - } - RawBsonRef::JavaScriptCodeWithScope(code_w_scope) => { - let len = code_w_scope.len(); - self.data.extend(len.to_le_bytes()); - write_string(self.data, code_w_scope.code); - self.data.extend(code_w_scope.scope.as_bytes()); - } - RawBsonRef::Timestamp(ts) => { - self.data.extend(ts.to_le_bytes()); - } - RawBsonRef::ObjectId(oid) => { - self.data.extend(oid.bytes()); - } - RawBsonRef::Symbol(s) => { - write_string(self.data, s); - } - RawBsonRef::Null | RawBsonRef::Undefined | RawBsonRef::MinKey | RawBsonRef::MaxKey => {} - value => value.append_to(self.data)?, - } + value.append_to(self.data)?; // append trailing null byte self.data.push(0); From 26a7f3a824fa249947f61aae80b28eebbf75670f Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Tue, 24 Jun 2025 14:38:21 -0400 Subject: [PATCH 4/6] raw serialization --- src/ser/raw.rs | 25 +++++------ src/ser/raw/value_serializer.rs | 74 +++++++++++++++++++++------------ 2 files changed, 57 insertions(+), 42 deletions(-) diff --git a/src/ser/raw.rs b/src/ser/raw.rs index d35411c2..6980423b 100644 --- a/src/ser/raw.rs +++ b/src/ser/raw.rs @@ -1,8 +1,6 @@ mod document_serializer; mod value_serializer; -use std::io::Write; - use serde::{ ser::{Error as SerdeError, SerializeMap, SerializeStruct}, Serialize, @@ -11,13 +9,15 @@ use serde::{ use self::value_serializer::{ValueSerializer, ValueType}; use crate::{ - raw::{write_cstring, write_string, RAW_ARRAY_NEWTYPE, RAW_DOCUMENT_NEWTYPE}, + raw::{write_cstring, RAW_ARRAY_NEWTYPE, RAW_DOCUMENT_NEWTYPE}, ser::{Error, Result}, serde_helpers::HUMAN_READABLE_NEWTYPE, spec::{BinarySubtype, ElementType}, uuid::UUID_NEWTYPE_NAME, + RawArray, RawBinaryRef, RawBsonRef, + RawDocument, }; use document_serializer::DocumentSerializer; @@ -131,9 +131,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { #[inline] fn serialize_bool(self, v: bool) -> Result { - self.update_element_type(ElementType::Boolean)?; - self.bytes.push(v as u8); - Ok(()) + self.serialize_raw(RawBsonRef::Boolean(v)) } #[inline] @@ -200,21 +198,19 @@ impl<'a> serde::Serializer for &'a mut Serializer { #[inline] fn serialize_str(self, v: &str) -> Result { - self.update_element_type(ElementType::String)?; - write_string(&mut self.bytes, v); - Ok(()) + self.serialize_raw(RawBsonRef::String(v)) } #[inline] fn serialize_bytes(self, v: &[u8]) -> Result { match self.hint.take() { SerializerHint::RawDocument => { - self.update_element_type(ElementType::EmbeddedDocument)?; - self.bytes.write_all(v)?; + self.serialize_raw(RawBsonRef::Document(RawDocument::new_unchecked(v)))?; } SerializerHint::RawArray => { - self.update_element_type(ElementType::Array)?; - self.bytes.write_all(v)?; + self.serialize_raw(RawBsonRef::Array(RawArray::from_doc( + RawDocument::new_unchecked(v), + )))?; } hint => { let subtype = if matches!(hint, SerializerHint::Uuid) { @@ -230,8 +226,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { #[inline] fn serialize_none(self) -> Result { - self.update_element_type(ElementType::Null)?; - Ok(()) + self.serialize_raw(RawBsonRef::Null) } #[inline] diff --git a/src/ser/raw/value_serializer.rs b/src/ser/raw/value_serializer.rs index b466256c..1be30eb5 100644 --- a/src/ser/raw/value_serializer.rs +++ b/src/ser/raw/value_serializer.rs @@ -1,4 +1,4 @@ -use std::{convert::TryFrom, io::Write}; +use std::convert::TryFrom; use serde::{ ser::{Error as SerdeError, Impossible, SerializeMap, SerializeStruct}, @@ -8,7 +8,7 @@ use serde::{ use crate::{ base64, oid::ObjectId, - raw::{write_cstring, write_string, RAW_DOCUMENT_NEWTYPE}, + raw::{write_string, RAW_DOCUMENT_NEWTYPE}, ser::{Error, Result}, spec::{BinarySubtype, ElementType}, RawBinaryRef, @@ -50,7 +50,9 @@ enum SerializationStep { RegEx, RegExPattern, - RegExOptions, + RegExOptions { + pattern: String, + }, Timestamp, TimestampTime, @@ -60,7 +62,9 @@ enum SerializationStep { DbPointer, DbPointerRef, - DbPointerId, + DbPointerId { + ns: String, + }, Code, @@ -191,11 +195,12 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { Ok(()) } SerializationStep::TimestampIncrement { time } => { - let t = RawBsonRef::Int32(u32::try_from(time).map_err(Error::custom)? as i32); - let i = RawBsonRef::Int32(u32::try_from(v).map_err(Error::custom)? as i32); + let time = u32::try_from(time).map_err(Error::custom)?; + let increment = u32::try_from(v).map_err(Error::custom)?; + + RawBsonRef::Timestamp(crate::Timestamp { time, increment }) + .append_to(&mut self.root_serializer.bytes)?; - i.append_to(&mut self.root_serializer.bytes)?; - t.append_to(&mut self.root_serializer.bytes)?; Ok(()) } _ => Err(self.invalid_step("i64")), @@ -251,12 +256,13 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { fn serialize_str(self, v: &str) -> Result { match &self.state { SerializationStep::DateTimeNumberLong => { - let millis = RawBsonRef::Int64(v.parse().map_err(Error::custom)?); - millis.append_to(&mut self.root_serializer.bytes)?; + let millis = v.parse().map_err(Error::custom)?; + RawBsonRef::DateTime(crate::DateTime::from_millis(millis)) + .append_to(&mut self.root_serializer.bytes)?; } SerializationStep::Oid => { let oid = ObjectId::parse_str(v).map_err(Error::custom)?; - self.root_serializer.bytes.write_all(&oid.bytes())?; + RawBsonRef::ObjectId(oid).append_to(&mut self.root_serializer.bytes)?; } SerializationStep::BinaryBytes => { self.state = SerializationStep::BinarySubType { @@ -270,21 +276,35 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { let binary = RawBinaryRef { subtype, bytes }; RawBsonRef::Binary(binary).append_to(&mut self.root_serializer.bytes)?; } - SerializationStep::Symbol | SerializationStep::DbPointerRef => { - write_string(&mut self.root_serializer.bytes, v); + SerializationStep::Symbol => { + RawBsonRef::Symbol(v).append_to(&mut self.root_serializer.bytes)?; + } + SerializationStep::DbPointerRef => { + self.state = SerializationStep::DbPointerId { ns: v.to_owned() }; + } + SerializationStep::DbPointerId { ns } => { + let id = ObjectId::parse_str(v).map_err(Error::custom)?; + RawBsonRef::DbPointer(crate::RawDbPointerRef { namespace: ns, id }) + .append_to(&mut self.root_serializer.bytes)?; } SerializationStep::RegExPattern => { - write_cstring(&mut self.root_serializer.bytes, v)?; + self.state = SerializationStep::RegExOptions { + pattern: v.to_string(), + }; } - SerializationStep::RegExOptions => { + SerializationStep::RegExOptions { pattern } => { let mut chars: Vec<_> = v.chars().collect(); chars.sort_unstable(); let sorted = chars.into_iter().collect::(); - write_cstring(&mut self.root_serializer.bytes, sorted.as_str())?; + RawBsonRef::RegularExpression(crate::RawRegexRef { + pattern: &pattern, + options: &sorted, + }) + .append_to(&mut self.root_serializer.bytes)?; } SerializationStep::Code => { - write_string(&mut self.root_serializer.bytes, v); + RawBsonRef::JavaScriptCode(v).append_to(&mut self.root_serializer.bytes)?; } SerializationStep::CodeWithScopeCode => { self.state = SerializationStep::CodeWithScopeScope { @@ -306,7 +326,8 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { fn serialize_bytes(self, v: &[u8]) -> Result { match self.state { SerializationStep::Decimal128Value => { - self.root_serializer.bytes.write_all(v)?; + let dec = crate::Decimal128::from_bytes(v.try_into().map_err(Error::custom)?); + RawBsonRef::Decimal128(dec).append_to(&mut self.root_serializer.bytes)?; Ok(()) } SerializationStep::BinaryBytes => { @@ -318,9 +339,8 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { code, scope: RawDocument::decode_from_bytes(v).map_err(Error::custom)?, }; - RawBsonRef::Int32(raw.len()).append_to(&mut self.root_serializer.bytes)?; - write_string(&mut self.root_serializer.bytes, code); - self.root_serializer.bytes.write_all(v)?; + RawBsonRef::JavaScriptCodeWithScope(raw) + .append_to(&mut self.root_serializer.bytes)?; self.state = SerializationStep::Done; Ok(()) } @@ -481,7 +501,7 @@ impl SerializeStruct for &mut ValueSerializer<'_> { self.state = SerializationStep::BinaryBytes; value.serialize(&mut **self)?; } - (SerializationStep::BinaryBytes, key) if key == "bytes" || key == "base64" => { + (SerializationStep::BinaryBytes, "bytes" | "base64") => { // state is updated in serialize value.serialize(&mut **self)?; } @@ -502,10 +522,10 @@ impl SerializeStruct for &mut ValueSerializer<'_> { value.serialize(&mut **self)?; } (SerializationStep::RegExPattern, "pattern") => { + // state is updated in serialize value.serialize(&mut **self)?; - self.state = SerializationStep::RegExOptions; } - (SerializationStep::RegExOptions, "options") => { + (SerializationStep::RegExOptions { .. }, "options") => { value.serialize(&mut **self)?; self.state = SerializationStep::Done; } @@ -526,12 +546,12 @@ impl SerializeStruct for &mut ValueSerializer<'_> { value.serialize(&mut **self)?; } (SerializationStep::DbPointerRef, "$ref") => { + // state is updated in serialize value.serialize(&mut **self)?; - self.state = SerializationStep::DbPointerId; } - (SerializationStep::DbPointerId, "$id") => { - self.state = SerializationStep::Oid; + (SerializationStep::DbPointerId { .. }, "$oid" | "$id") => { value.serialize(&mut **self)?; + self.state = SerializationStep::Done; } (SerializationStep::Code, "$code") => { value.serialize(&mut **self)?; From 7512941660f781175752852f965a07bac080a8cd Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Tue, 24 Jun 2025 14:45:37 -0400 Subject: [PATCH 5/6] seeded_visitor --- src/raw/serde/seeded_visitor.rs | 72 +++------------------------------ 1 file changed, 6 insertions(+), 66 deletions(-) diff --git a/src/raw/serde/seeded_visitor.rs b/src/raw/serde/seeded_visitor.rs index 2b5034fa..8361421c 100644 --- a/src/raw/serde/seeded_visitor.rs +++ b/src/raw/serde/seeded_visitor.rs @@ -271,72 +271,12 @@ impl<'de> Visitor<'de> for SeededVisitor<'_, 'de> { Ok(ElementType::Array) } // Cases that don't - _ => match bson.as_ref() { - RawBsonRef::ObjectId(oid) => { - self.buffer.append_bytes(&oid.bytes()); - Ok(ElementType::ObjectId) - } - RawBsonRef::Symbol(s) => { - self.append_string(s); - Ok(ElementType::Symbol) - } - RawBsonRef::Decimal128(d) => { - self.buffer.append_bytes(&d.bytes); - Ok(ElementType::Decimal128) - } - RawBsonRef::RegularExpression(re) => { - self.append_cstring(re.pattern) - .map_err(SerdeError::custom)?; - self.append_cstring(re.options) - .map_err(SerdeError::custom)?; - Ok(ElementType::RegularExpression) - } - RawBsonRef::Undefined => Ok(ElementType::Undefined), - RawBsonRef::DateTime(dt) => { - self.buffer - .append_bytes(&dt.timestamp_millis().to_le_bytes()); - Ok(ElementType::DateTime) - } - RawBsonRef::Timestamp(ts) => { - self.buffer.append_bytes(&ts.increment.to_le_bytes()); - self.buffer.append_bytes(&ts.time.to_le_bytes()); - Ok(ElementType::Timestamp) - } - RawBsonRef::MinKey => Ok(ElementType::MinKey), - RawBsonRef::MaxKey => Ok(ElementType::MaxKey), - RawBsonRef::JavaScriptCode(s) => { - self.append_string(s); - Ok(ElementType::JavaScriptCode) - } - RawBsonRef::JavaScriptCodeWithScope(jsc) => { - let length_index = self.pad_document_length(); - self.append_string(jsc.code); - self.buffer.append_bytes(jsc.scope.as_bytes()); - - let length_bytes = - ((self.buffer.len() - length_index) as i32).to_le_bytes(); - self.buffer - .copy_from_slice(length_index..length_index + 4, &length_bytes); - - Ok(ElementType::JavaScriptCodeWithScope) - } - RawBsonRef::DbPointer(dbp) => { - self.append_string(dbp.namespace); - self.buffer.append_bytes(&dbp.id.bytes()); - Ok(ElementType::DbPointer) - } - RawBsonRef::Double(d) => self.visit_f64(d), - RawBsonRef::String(s) => self.visit_str(s), - RawBsonRef::Boolean(b) => self.visit_bool(b), - RawBsonRef::Null => self.visit_none(), - RawBsonRef::Int32(i) => self.visit_i32(i), - RawBsonRef::Int64(i) => self.visit_i64(i), - // These are always borrowed and are handled - // at the top of the outer `match`. - RawBsonRef::Array(_) | RawBsonRef::Document(_) | RawBsonRef::Binary(_) => { - unreachable!() - } - }, + _ => { + let bson = bson.as_ref(); + bson.append_to(self.buffer.get_owned_buffer()) + .map_err(A::Error::custom)?; + Ok(bson.element_type()) + } } } MapParse::Aggregate(first_key) => { From bacbe4dc2a10438e7b247eb97af3612013b7e3a9 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Tue, 24 Jun 2025 15:13:37 -0400 Subject: [PATCH 6/6] clippy --- src/raw/bson_ref.rs | 5 +++-- src/ser/raw/value_serializer.rs | 11 ++++------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/raw/bson_ref.rs b/src/raw/bson_ref.rs index e8ec6dd8..8a72fed5 100644 --- a/src/raw/bson_ref.rs +++ b/src/raw/bson_ref.rs @@ -288,7 +288,7 @@ impl<'a> RawBsonRef<'a> { #[inline] pub(crate) fn append_to(self, dest: &mut Vec) -> Result<()> { - Ok(match self { + match self { Self::Int32(val) => dest.extend(val.to_le_bytes()), Self::Int64(val) => dest.extend(val.to_le_bytes()), Self::Double(val) => dest.extend(val.to_le_bytes()), @@ -326,7 +326,8 @@ impl<'a> RawBsonRef<'a> { dest.extend(dbp.id.bytes()); } Self::Null | Self::Undefined | Self::MinKey | Self::MaxKey => {} - }) + } + Ok(()) } } diff --git a/src/ser/raw/value_serializer.rs b/src/ser/raw/value_serializer.rs index 1be30eb5..c57aa44f 100644 --- a/src/ser/raw/value_serializer.rs +++ b/src/ser/raw/value_serializer.rs @@ -213,7 +213,7 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { SerializationStep::RawBinarySubType { ref bytes } => { let binary = RawBinaryRef { subtype: v.into(), - bytes: &bytes, + bytes, }; RawBsonRef::Binary(binary).append_to(&mut self.root_serializer.bytes)?; self.state = SerializationStep::Done; @@ -296,12 +296,9 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { let mut chars: Vec<_> = v.chars().collect(); chars.sort_unstable(); - let sorted = chars.into_iter().collect::(); - RawBsonRef::RegularExpression(crate::RawRegexRef { - pattern: &pattern, - options: &sorted, - }) - .append_to(&mut self.root_serializer.bytes)?; + let options = &chars.into_iter().collect::(); + RawBsonRef::RegularExpression(crate::RawRegexRef { pattern, options }) + .append_to(&mut self.root_serializer.bytes)?; } SerializationStep::Code => { RawBsonRef::JavaScriptCode(v).append_to(&mut self.root_serializer.bytes)?;