From 6403c4adb2e5b9c8e5982708b2f81e4021cc2c4d Mon Sep 17 00:00:00 2001 From: "J. Cliff Dyer" Date: Sun, 7 Feb 2021 10:41:31 -0500 Subject: [PATCH 01/48] RUST-284 Add raw types for more efficient usage of BSON --- Cargo.toml | 4 +- src/lib.rs | 1 + src/raw/de.rs | 1074 +++++++++++++++++++++++ src/raw/de/binary.rs | 201 +++++ src/raw/de/datetime.rs | 162 ++++ src/raw/de/js.rs | 229 +++++ src/raw/de/object_id.rs | 146 ++++ src/raw/de/regex.rs | 209 +++++ src/raw/elem.rs | 382 ++++++++ src/raw/mod.rs | 1849 +++++++++++++++++++++++++++++++++++++++ src/raw/props.rs | 59 ++ 11 files changed, 4315 insertions(+), 1 deletion(-) create mode 100644 src/raw/de.rs create mode 100644 src/raw/de/binary.rs create mode 100644 src/raw/de/datetime.rs create mode 100644 src/raw/de/js.rs create mode 100644 src/raw/de/object_id.rs create mode 100644 src/raw/de/regex.rs create mode 100644 src/raw/elem.rs create mode 100644 src/raw/mod.rs create mode 100644 src/raw/props.rs diff --git a/Cargo.toml b/Cargo.toml index b653b668..3a35ff53 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,8 +55,10 @@ serde_bytes = "0.11.5" [dev-dependencies] assert_matches = "1.2" -serde_bytes = "0.11" +criterion = "0.3.0" pretty_assertions = "0.6.1" +proptest = "0.10" +serde_bytes = "0.11" chrono = { version = "0.4", features = ["serde"] } [package.metadata.docs.rs] diff --git a/src/lib.rs b/src/lib.rs index 6795bef0..d7aca074 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -293,6 +293,7 @@ pub mod decimal128; pub mod document; pub mod extjson; pub mod oid; +pub mod raw; pub mod ser; pub mod serde_helpers; pub mod spec; diff --git a/src/raw/de.rs b/src/raw/de.rs new file mode 100644 index 00000000..8c58b8c3 --- /dev/null +++ b/src/raw/de.rs @@ -0,0 +1,1074 @@ +use serde::de::{self, DeserializeSeed, Deserializer, MapAccess, SeqAccess, Visitor}; +use serde::forward_to_deserialize_any; +use serde::Deserialize; + +use std::convert::TryInto; +use std::fmt::Debug; +use std::num::TryFromIntError; + +use crate::raw::{elem::Element, ArrayIter, Doc, DocBuf, DocIter, RawError}; +use crate::spec::ElementType; + +use object_id::RawObjectIdDeserializer; + +pub mod binary; +pub mod datetime; +pub mod js; +pub mod object_id; +pub mod regex; + +#[derive(Debug)] +pub enum Error { + Eof, + TrailingData(Vec), + EncodingError, + MalformedDocument, + UnexpectedType, + Unimplemented, + IntConversion(TryFromIntError), + Internal(String), + NotFound, + TmPErroR, +} + +impl From for Error { + fn from(err: TryFromIntError) -> Error { + Error::IntConversion(err) + } +} + +impl std::error::Error for Error {} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} + +impl de::Error for Error { + fn custom(err: T) -> Error { + Error::Internal(format!("{}", err)) + } +} + +impl From for Error { + fn from(val: RawError) -> Error { + match val { + RawError::Utf8EncodingError(_) => Error::EncodingError, + RawError::UnexpectedType => Error::UnexpectedType, + RawError::MalformedValue(_) => Error::MalformedDocument, + } + } +} + +pub struct BsonDeserializer<'de> { + bson: Element<'de>, +} + +impl<'de> BsonDeserializer<'de> { + #[deprecated(since = "0.2.0", note = "use from_doc(&docref) instead")] + pub fn from_docref(doc: &'de Doc) -> Self { + BsonDeserializer::from_rawbson(Element::new(ElementType::EmbeddedDocument, doc.as_bytes())) + } + + pub fn from_doc(doc: &'de Doc) -> Self { + BsonDeserializer::from_rawbson(Element::new(ElementType::EmbeddedDocument, doc.as_bytes())) + } + + pub fn from_rawbson(bson: Element<'de>) -> Self { + BsonDeserializer { bson } + } +} + +#[deprecated(since = "0.2.0", note = "use from_doc(&docbuf) instead")] +pub fn from_docbuf<'de, T>(rawdoc_buf: &'de DocBuf) -> Result +where + T: Deserialize<'de>, +{ + from_doc(rawdoc_buf) +} + +pub fn from_doc<'de, T>(rawdoc: &'de Doc) -> Result +where + T: Deserialize<'de>, +{ + let mut de = BsonDeserializer::from_doc(rawdoc); + T::deserialize(&mut de) +} + +#[deprecated(since = "0.2.0", note = "use from_doc(&docref) instead")] +pub fn from_docref<'de, T>(rawdoc: &'de Doc) -> Result +where + T: Deserialize<'de>, +{ + from_doc(rawdoc) +} + +pub fn from_bytes<'de, T: 'de>(data: &'de [u8]) -> Result +where + T: Deserialize<'de>, +{ + let raw_document = Doc::new(data)?; + from_doc(raw_document) +} + +impl<'a, 'de: 'a> Deserializer<'de> for &'a mut BsonDeserializer<'de> { + type Error = Error; + + fn deserialize_any>(self, visitor: V) -> Result { + match self.bson.element_type() { + ElementType::Double => self.deserialize_f64(visitor), + ElementType::String => self.deserialize_str(visitor), + ElementType::EmbeddedDocument => self.deserialize_map(visitor), + ElementType::Array => self.deserialize_seq(visitor), + ElementType::Binary => self.deserialize_bytes(visitor), + ElementType::Undefined => self.deserialize_unit(visitor), + ElementType::ObjectId => { + self.deserialize_struct(object_id::NAME, object_id::FIELDS, visitor) + } + ElementType::Boolean => self.deserialize_bool(visitor), + ElementType::DateTime => { + self.deserialize_struct(datetime::NAME, datetime::FIELDS, visitor) + } + ElementType::Null => self.deserialize_unit(visitor), + ElementType::DbPointer => Err(Error::Unimplemented), // deserialize (&str, ObjectId), or struct + ElementType::RegularExpression => { + self.deserialize_struct(regex::NAME, regex::FIELDS, visitor) + } + ElementType::JavaScriptCode => self.deserialize_str(visitor), + ElementType::Symbol => self.deserialize_str(visitor), + ElementType::JavaScriptCodeWithScope => { + self.deserialize_struct(js::WITH_SCOPE_NAME, js::WITH_SCOPE_FIELDS, visitor) + } // deserialize (&'str, Map) or struct + ElementType::Int32 => self.deserialize_i32(visitor), + ElementType::Timestamp => self.deserialize_u64(visitor), + ElementType::Int64 => self.deserialize_i64(visitor), + ElementType::MinKey => self.deserialize_unit(visitor), + ElementType::MaxKey => self.deserialize_unit(visitor), + ElementType::Decimal128 => self.deserialize_i128(visitor), + } + } + + fn deserialize_bool>(self, visitor: V) -> Result { + visitor.visit_bool(self.bson.as_bool()?) + } + + #[cfg(feature = "u2i")] + fn deserialize_u8>(self, visitor: V) -> Result { + let val = match self.bson.element_type() { + ElementType::Int32 => self.bson.as_i32()?.try_into()?, + ElementType::Int64 => self.bson.as_i64()?.try_into()?, + _ => return Err(Error::UnexpectedType), + }; + visitor.visit_u8(val) + } + + fn deserialize_i8>(self, visitor: V) -> Result { + let val = match self.bson.element_type() { + ElementType::Int32 => self.bson.as_i32()?.try_into()?, + ElementType::Int64 => self.bson.as_i64()?.try_into()?, + _ => return Err(Error::UnexpectedType), + }; + visitor.visit_i8(val) + } + + #[cfg(feature = "u2i")] + fn deserialize_u16>(self, visitor: V) -> Result { + let val = match self.bson.element_type() { + ElementType::Int32 => self.bson.as_i32()?.try_into()?, + ElementType::Int64 => self.bson.as_i64()?.try_into()?, + _ => return Err(Error::UnexpectedType), + }; + visitor.visit_u16(val) + } + fn deserialize_i16>(self, visitor: V) -> Result { + let val = match self.bson.element_type() { + ElementType::Int32 => self.bson.as_i32()?.try_into()?, + ElementType::Int64 => self.bson.as_i64()?.try_into()?, + _ => return Err(Error::UnexpectedType), + }; + visitor.visit_i16(val) + } + + #[cfg(feature = "u2i")] + fn deserialize_u32>(self, visitor: V) -> Result { + let val = match self.bson.element_type() { + ElementType::Int32 => self.bson.as_i32()?.try_into()?, + ElementType::Int64 => self.bson.as_i64()?.try_into()?, + _ => return Err(Error::UnexpectedType), + }; + visitor.visit_u32(val) + } + + fn deserialize_i32>(self, visitor: V) -> Result { + let val = match self.bson.element_type() { + ElementType::Int32 => self.bson.as_i32()?, + ElementType::Int64 => self.bson.as_i64()?.try_into()?, + _ => return Err(Error::UnexpectedType), + }; + visitor.visit_i32(val) + } + + #[cfg(feature = "u2i")] + fn deserialize_u64>(self, visitor: V) -> Result { + let val = match self.bson.element_type() { + ElementType::Int32 => self.bson.as_i32()?.try_into()?, + ElementType::Int64 => self.bson.as_i64()?.try_into()?, + ElementType::Timestamp => self.bson.as_timestamp()?, + ElementType::DateTime => self + .bson + .as_utc_date_time()? + .timestamp_millis() + .try_into()?, + _ => return Err(Error::UnexpectedType), + }; + visitor.visit_u64(val) + } + + fn deserialize_i64>(self, visitor: V) -> Result { + let val = match self.bson.element_type() { + ElementType::Int32 => self.bson.as_i32()?.into(), + ElementType::Int64 => self.bson.as_i64()?, + ElementType::DateTime => self.bson.as_datetime()?.timestamp_millis(), + _ => return Err(Error::UnexpectedType), + }; + visitor.visit_i64(val) + } + + fn deserialize_i128>(self, visitor: V) -> Result { + let val = match self.bson.element_type() { + ElementType::Int32 => self.bson.as_i32()?.into(), + ElementType::Int64 => self.bson.as_i64()?.into(), + _ => return Err(Error::UnexpectedType), + }; + visitor.visit_i128(val) + } + + #[cfg(feature = "u2i")] + fn deserialize_u128>(self, visitor: V) -> Result { + let val = match self.bson.element_type() { + ElementType::Int32 => self.bson.as_i32()?.try_into()?, + ElementType::Int64 => self.bson.as_i64()?.try_into()?, + ElementType::Timestamp => self.bson.as_timestamp()?.into(), + _ => return Err(Error::UnexpectedType), + }; + visitor.visit_u128(val) + } + + #[cfg(not(feature = "u2i"))] + fn deserialize_u8>(self, _visitor: V) -> Result { + Err(Error::MalformedDocument) + } + + #[cfg(not(feature = "u2i"))] + fn deserialize_u16>(self, _visitor: V) -> Result { + Err(Error::MalformedDocument) + } + + #[cfg(not(feature = "u2i"))] + fn deserialize_u32>(self, _visitor: V) -> Result { + Err(Error::MalformedDocument) + } + + #[cfg(not(feature = "u2i"))] + fn deserialize_u64>(self, visitor: V) -> Result { + let val = match self.bson.element_type() { + ElementType::Timestamp => self.bson.as_timestamp()?.time() as u64, // TODO: Proper Timestamp handling + ElementType::Int64 => self.bson.as_i64()?.try_into()?, + _ => return Err(Error::UnexpectedType), + }; + visitor.visit_u64(val) + } + + fn deserialize_f32>(self, visitor: V) -> Result { + visitor.visit_f64(self.bson.as_f64()?) + } + + fn deserialize_f64>(self, visitor: V) -> Result { + visitor.visit_f64(self.bson.as_f64()?) + } + + fn deserialize_char>(self, visitor: V) -> Result { + let s = self.bson.as_str()?; + let mut chars = s.chars(); + let char = match chars.next() { + Some(char) => char, + None => return Err(Error::UnexpectedType), + }; + if chars.next().is_none() { + visitor.visit_char(char) + } else { + // Got multiple characters. + Err(Error::UnexpectedType) + } + } + + fn deserialize_str>(self, visitor: V) -> Result { + match self.bson.element_type() { + ElementType::String => visitor.visit_borrowed_str(self.bson.as_str()?), + ElementType::JavaScriptCode => visitor.visit_borrowed_str(self.bson.as_javascript()?), + ElementType::Symbol => visitor.visit_borrowed_str(self.bson.as_symbol()?), + + _ => Err(Error::MalformedDocument), + } + } + + fn deserialize_string>(self, visitor: V) -> Result { + match self.bson.element_type() { + ElementType::String => visitor.visit_str(self.bson.as_str()?), + ElementType::JavaScriptCode => visitor.visit_str(self.bson.as_javascript()?), + ElementType::Symbol => visitor.visit_str(self.bson.as_symbol()?), + ElementType::ObjectId => visitor.visit_str(&self.bson.as_object_id()?.to_hex()), + _ => Err(Error::Unimplemented), + } + } + + fn deserialize_bytes>(self, visitor: V) -> Result { + match self.bson.element_type() { + ElementType::String => { + let raw_data = self.bson.as_bytes(); + let len = i32::from_le_bytes(raw_data[0..4].try_into().expect("i32 needs 4 bytes")); + assert_eq!(raw_data.len(), len as usize + 4); + visitor.visit_borrowed_bytes(&raw_data[4..]) + } + ElementType::Binary => { + let binary = self.bson.as_binary().expect("was not binary"); + let deserializer = binary::BinaryDeserializer::new(binary); + deserializer.deserialize_bytes(visitor) + } + ElementType::Symbol => { + let raw_data = self.bson.as_bytes(); + let len = i32::from_le_bytes(raw_data[0..4].try_into().expect("i32 needs 4 bytes")); + assert_eq!(raw_data.len(), len as usize + 4); + visitor.visit_borrowed_bytes(&raw_data[4..]) + } + ElementType::ObjectId => visitor.visit_borrowed_bytes(self.bson.as_bytes()), + _ => Err(Error::MalformedDocument), + } + } + + fn deserialize_byte_buf>(self, visitor: V) -> Result { + match self.bson.element_type() { + ElementType::String => { + let raw_data = self.bson.as_bytes(); + let len = i32::from_le_bytes(raw_data[0..4].try_into().expect("i32 needs 4 bytes")); + assert_eq!(raw_data.len(), len as usize + 4); + visitor.visit_bytes(&raw_data[4..]) + } + ElementType::Binary => { + let binary = self.bson.as_binary()?; + let deserializer = binary::BinaryDeserializer::new(binary); + deserializer.deserialize_byte_buf(visitor) + } + ElementType::Symbol => { + let raw_data = self.bson.as_bytes(); + let len = i32::from_le_bytes(raw_data[0..4].try_into().expect("i32 needs 4 bytes")); + assert_eq!(raw_data.len(), len as usize + 4); + visitor.visit_bytes(&raw_data[4..]) + } + ElementType::ObjectId => visitor.visit_bytes(self.bson.as_bytes()), + _ => Err(Error::MalformedDocument), + } + } + + fn deserialize_option>(self, visitor: V) -> Result { + match self.bson.element_type() { + ElementType::Null => visitor.visit_none(), + _ => visitor.visit_some(self), + } + } + + fn deserialize_unit>(self, visitor: V) -> Result { + match self.bson.element_type() { + ElementType::Null => visitor.visit_unit(), + _ => Err(Error::MalformedDocument), + } + } + + fn deserialize_unit_struct>( + self, + _name: &str, + visitor: V, + ) -> Result { + self.deserialize_unit(visitor) + } + + fn deserialize_newtype_struct>( + self, + _name: &str, + visitor: V, + ) -> Result { + visitor.visit_newtype_struct(self) + } + + fn deserialize_seq>(self, visitor: V) -> Result { + match self.bson.element_type() { + ElementType::Array => { + let arr = self.bson.as_array()?; + let sequencer = BsonArraySequencer::new(arr.into_iter()); + visitor.visit_seq(sequencer) + } + ElementType::ObjectId => self.deserialize_byte_buf(visitor), + _ => Err::(Error::Unimplemented), + } + } + + fn deserialize_map>(self, visitor: V) -> Result { + match self.bson.element_type() { + ElementType::EmbeddedDocument => { + let doc = self.bson.as_document()?; + let mapper = BsonDocumentMap::new(doc.into_iter()); + visitor.visit_map(mapper) + } + ElementType::ObjectId => { + let mapper = RawObjectIdDeserializer::new(self.bson); + visitor.visit_map(mapper) + } + et => { + println!("Map of {:?}", et); + Err(Error::TmPErroR) + } + } + } + + fn deserialize_tuple>( + self, + len: usize, + visitor: V, + ) -> Result { + match self.bson.element_type() { + ElementType::Array => self.deserialize_seq(visitor), + ElementType::JavaScriptCodeWithScope => { + js::JavaScriptWithScopeDeserializer::new(self.bson.as_javascript_with_scope()?) + .deserialize_tuple(len, visitor) + } + ElementType::RegularExpression => { + regex::RegexDeserializer::new(self.bson.as_regex()?).deserialize_tuple(len, visitor) + } + + _ => Err(Error::TmPErroR), + } + } + + fn deserialize_tuple_struct>( + self, + _name: &str, + len: usize, + visitor: V, + ) -> Result { + self.deserialize_tuple(len, visitor) + } + + fn deserialize_struct>( + self, + name: &'static str, + fields: &'static [&'static str], + visitor: V, + ) -> Result { + if name == object_id::NAME { + object_id::RawObjectIdDeserializer::new(self.bson) + .deserialize_struct(name, fields, visitor) + } else if name == binary::NAME { + self.bson + .as_binary() + .map_err(Error::from) + .map(binary::BinaryDeserializer::new) + .and_then(|de| de.deserialize_struct(name, fields, visitor)) + } else if name == datetime::NAME { + self.bson + .as_datetime() + .map_err(Error::from) + .map(|dt| dt.timestamp_millis()) + .map(datetime::DateTimeDeserializer::new) + .and_then(|de| de.deserialize_struct(name, fields, visitor)) + } else if name == js::WITH_SCOPE_NAME { + self.bson + .as_javascript_with_scope() + .map_err(Error::from) + .map(js::JavaScriptWithScopeDeserializer::new) + .and_then(|de| de.deserialize_struct(name, fields, visitor)) + } else if name == regex::NAME { + self.bson + .as_regex() + .map_err(Error::from) + .map(regex::RegexDeserializer::new) + .and_then(|de| de.deserialize_struct(name, fields, visitor)) + } else { + self.deserialize_map(visitor) + } + } + + fn deserialize_enum>( + self, + _name: &str, + _fields: &[&str], + _visitor: V, + ) -> Result { + Err(Error::Unimplemented) + } + + fn deserialize_ignored_any>(self, visitor: V) -> Result { + visitor.visit_unit() + } + + fn deserialize_identifier>(self, visitor: V) -> Result { + self.deserialize_str(visitor) + } +} + +struct BsonArraySequencer<'de> { + arr_iter: ArrayIter<'de>, +} + +impl<'de> BsonArraySequencer<'de> { + fn new(arr_iter: ArrayIter<'de>) -> Self { + BsonArraySequencer { arr_iter } + } +} + +impl<'de> SeqAccess<'de> for BsonArraySequencer<'de> { + type Error = Error; + + fn next_element_seed(&mut self, seed: E) -> Result, Self::Error> + where + E: DeserializeSeed<'de>, + { + match self.arr_iter.next() { + Some(Ok(bson)) => { + let mut deserializer = BsonDeserializer::from_rawbson(bson); + seed.deserialize(&mut deserializer).map(Some) + } + Some(Err(err)) => Err(err.into()), + None => Ok(None), + } + } +} + +struct BsonDocumentMap<'de> { + doc_iter: DocIter<'de>, + next: Option>, +} + +impl<'de> BsonDocumentMap<'de> { + fn new(doc_iter: DocIter<'de>) -> Self { + BsonDocumentMap { + doc_iter, + next: None, + } + } +} + +impl<'de> MapAccess<'de> for BsonDocumentMap<'de> { + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> Result, Error> + where + K: DeserializeSeed<'de>, + { + match self.doc_iter.next() { + Some(Ok((key, value))) => { + self.next = Some(value); + let deserializer = StrDeserializer::new(key); + Ok(Some(seed.deserialize(deserializer)?)) + } + Some(Err(err)) => Err(err.into()), + None => Ok(None), + } + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: DeserializeSeed<'de>, + { + let bson = self.next.take().ok_or(Error::Eof)?; + let mut deserializer = BsonDeserializer::from_rawbson(bson); + seed.deserialize(&mut deserializer) + } +} + +struct StrDeserializer<'a> { + value: &'a str, +} + +impl<'a> StrDeserializer<'a> { + fn new(value: &'a str) -> StrDeserializer<'a> { + StrDeserializer { value } + } +} + +impl<'de> Deserializer<'de> for StrDeserializer<'de> { + type Error = Error; + + fn deserialize_any>(self, visitor: V) -> Result { + visitor.visit_borrowed_str(self.value) + } + + forward_to_deserialize_any!( + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq + bytes byte_buf map struct option unit newtype_struct + ignored_any unit_struct tuple_struct tuple enum identifier + ); +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use crate::{doc, oid::ObjectId, Bson, DateTime}; + use crate::{spec::BinarySubtype, Binary, JavaScriptCodeWithScope}; + use chrono::Utc; + use serde::Deserialize; + + use crate::raw::{Doc, DocBuf}; + use super::{from_bytes, from_doc}; + + mod uuid { + use std::convert::TryInto; + use std::fmt; + + use serde::de::Visitor; + use serde::de::{Deserialize, MapAccess}; + use serde::Deserializer; + + use crate::spec::BinarySubtype; + + #[derive(Clone, Debug, Eq, PartialEq)] + pub(super) struct Uuid { + data: Vec, + } + + impl Uuid { + pub fn new(data: Vec) -> Uuid { + Uuid { data } + } + } + + impl<'de> Deserialize<'de> for Uuid { + fn deserialize(deserializer: D) -> Result>::Error> + where + D: Deserializer<'de>, + { + struct UuidVisitor; + + impl<'de> Visitor<'de> for UuidVisitor { + type Value = Uuid; + + fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + formatter.write_str("a bson uuid") + } + + fn visit_map(self, mut map: M) -> Result + where + M: MapAccess<'de>, + { + let subtype_key = map.next_key::()?; + if subtype_key.map(|dk| dk.key) != Some(super::super::binary::SUBTYPE_FIELD) + { + return Err(serde::de::Error::custom( + "BinarySubtypeKey not found in synthesized struct", + )); + } + + let subtype_value: BinarySubtypeFromU8 = map.next_value()?; + match subtype_value.subtype { + BinarySubtype::Uuid | BinarySubtype::UuidOld => {} + _ => { + return Err(serde::de::Error::custom( + "Expected binary subtype of Uuid (4) or UuidOld (3)", + )) + } + } + + let data_key = map.next_key::()?; + + if data_key.map(|dk| dk.key) != Some(super::super::binary::DATA_FIELD) { + return Err(serde::de::Error::custom( + "BinaryDataKey not found in synthesized struct", + )); + } + let data_value: BinaryDataFromBytes = map.next_value()?; + Ok(Uuid { + data: data_value.data, + }) + } + } + static FIELDS: [&str; 2] = [ + super::super::binary::SUBTYPE_FIELD, + super::super::binary::DATA_FIELD, + ]; + deserializer.deserialize_struct(super::super::binary::NAME, &FIELDS, UuidVisitor) + } + } + + struct FieldKey { + key: &'static str, + } + + impl FieldKey { + fn new(key: &'static str) -> FieldKey { + FieldKey { key } + } + } + + impl<'de> Deserialize<'de> for FieldKey { + fn deserialize(deserializer: D) -> Result>::Error> + where + D: Deserializer<'de>, + { + struct KeyVisitor; + + impl<'de> Visitor<'de> for KeyVisitor { + type Value = FieldKey; + + fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + formatter.write_str("an identifier") + } + + fn visit_str(self, s: &str) -> Result { + use super::super::binary::{DATA_FIELD, SUBTYPE_FIELD}; + if s == SUBTYPE_FIELD { + Ok(FieldKey::new(SUBTYPE_FIELD)) + } else if s == DATA_FIELD { + Ok(FieldKey::new(DATA_FIELD)) + } else { + Err(serde::de::Error::custom(format!("unexpected field: {}", s))) + } + } + } + + deserializer.deserialize_identifier(KeyVisitor) + } + } + + struct BinarySubtypeFromU8 { + subtype: BinarySubtype, + } + + impl BinarySubtypeFromU8 { + fn new(subtype_byte: u8) -> BinarySubtypeFromU8 { + let subtype = BinarySubtype::from(subtype_byte); + BinarySubtypeFromU8 { subtype } + } + } + + impl<'de> Deserialize<'de> for BinarySubtypeFromU8 { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct SubtypeVisitor; + + impl<'de> Visitor<'de> for SubtypeVisitor { + type Value = BinarySubtypeFromU8; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a u8 representing a binary subtype") + } + + fn visit_u8( + self, + byte: u8, + ) -> Result { + Ok(BinarySubtypeFromU8::new(byte)) + } + fn visit_i32( + self, + int: i32, + ) -> Result { + Ok(BinarySubtypeFromU8::new( + int.try_into().map_err(|_| E::custom("non-byte integer"))?, + )) + } + } + + deserializer.deserialize_u8(SubtypeVisitor) + } + } + + struct BinaryDataFromBytes { + data: Vec, + } + + impl BinaryDataFromBytes { + fn new(data: Vec) -> BinaryDataFromBytes { + BinaryDataFromBytes { data } + } + } + + impl<'de> Deserialize<'de> for BinaryDataFromBytes { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct DataVisitor; + + impl<'de> Visitor<'de> for DataVisitor { + type Value = BinaryDataFromBytes; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("bytes") + } + + fn visit_bytes( + self, + bytes: &[u8], + ) -> Result { + Ok(BinaryDataFromBytes::new(bytes.to_vec())) + } + } + + deserializer.deserialize_bytes(DataVisitor) + } + } + } + + #[derive(Debug, Deserialize)] + struct Person<'a> { + #[serde(rename = "_id")] + id: ObjectId, + first_name: &'a str, + middle_name: Option, + last_name: String, + number: &'a [u8], + gid: uuid::Uuid, + has_cookies: bool, + birth_year: Option, + } + + #[test] + fn deserialize_struct() { + let mut docbytes = Vec::new(); + let doc = doc! { + "_id": ObjectId::with_string("abcdefabcdefabcdefabcdef").unwrap(), + "first_name": "Edward", + "middle_name": Bson::Null, + "last_name": "Teach", + "number": Binary { subtype: BinarySubtype::Generic, bytes: vec![8, 6, 7, 5, 3, 0, 9] }, + "has_cookies": false, + "gid": Binary { subtype: BinarySubtype::Uuid, bytes: b"12345678901234567890123456789012".to_vec() }, + "birth_year": 15.0, + }; + doc.to_writer(&mut docbytes) + .expect("could not encode document"); + let p: Person = from_bytes(&docbytes).expect("could not decode into Person struct"); + assert_eq!(p.first_name, "Edward"); + assert_eq!(p.middle_name, None); + assert_eq!(p.last_name, "Teach"); + assert_eq!(p.id.to_hex(), "abcdefabcdefabcdefabcdef"); + assert_eq!(p.number, &[8, 6, 7, 5, 3, 0, 9]); + assert_eq!(p.has_cookies, false); + assert_eq!( + p.gid, + uuid::Uuid::new(b"12345678901234567890123456789012".to_vec()) + ); + assert_eq!(p.birth_year, Some(15.0)); + } + + #[test] + fn object_id() { + let object_id = ObjectId::new(); + let doc = doc! { + "oid": object_id.clone(), + }; + let mut docbytes = Vec::new(); + doc.to_writer(&mut docbytes) + .expect("cannot serialize document"); + let as_object: HashMap = + from_bytes(&docbytes).expect("deserialize object_id"); + assert_eq!(as_object.get("oid").unwrap(), &object_id); + let as_string: HashMap = from_bytes(&docbytes).expect("deserialize string"); + assert_eq!(as_string.get("oid").unwrap(), &object_id.to_hex()); + let as_bytes: HashMap = + from_bytes(&docbytes).expect("deserialize borrowed bytes"); + assert_eq!(as_bytes.get("oid").unwrap(), &object_id.bytes()); + } + + #[test] + fn wrong_binary_type_for_uuid() { + let mut docbytes = Vec::new(); + let doc = &doc! { + "_id": ObjectId::with_string("abcdefabcdefabcdefabcdef").unwrap(), + "first_name": "Edward", + "last_name": "Teach", + "has cookies": true, + "number": Binary { subtype: BinarySubtype::BinaryOld, bytes: vec![7, 0, 0, 0, 8, 6, 7, 5, 3, 0, 9] }, + "gid": Binary { subtype: BinarySubtype::Function, bytes: b"12345678901234567890123456789012".to_vec() }, + }; + doc.to_writer(&mut docbytes) + .expect("could not encode document"); + + from_bytes::(&docbytes).expect_err("Should have failed to decode gid field"); + } + + #[test] + fn deserialize_map() { + let mut docbytes = Vec::new(); + let doc = doc! { + "this": "that", + "three": "four", + "keymaster": "gatekeeper", + }; + doc.to_writer(&mut docbytes) + .expect("could not encode document"); + + let map: HashMap<&str, &str> = + from_bytes(&docbytes).expect("could not decode into HashMap<&str, &str>"); + assert_eq!(map.len(), 3); + assert_eq!(*map.get("this").expect("key not found"), "that"); + assert_eq!(*map.get("three").expect("key not found"), "four"); + assert_eq!(*map.get("keymaster").expect("key not found"), "gatekeeper"); + + let map: HashMap = + from_bytes(&docbytes).expect("could not decode into HashMap"); + assert_eq!(map.len(), 3); + assert_eq!(map.get("this").expect("key not found"), "that"); + assert_eq!(map.get("three").expect("key not found"), "four"); + assert_eq!(map.get("keymaster").expect("key not found"), "gatekeeper"); + } + + #[test] + fn deserialize_seq() { + let mut docbytes = Vec::new(); + let doc = doc! {"array": [1i32, 2i64, 3i32, "abc"]}; + doc.to_writer(&mut docbytes) + .expect("could not encode document"); + let map: HashMap> = + from_bytes(&docbytes).expect("could not decode into HashMap"); + assert_eq!(map.len(), 1); + let arr = map.get("array").expect("key not found"); + assert_eq!(arr.get(0).expect("no index 0"), &Bson::Int32(1)); + assert_eq!(arr.get(1).expect("no index 1"), &Bson::Int64(2)); + assert_eq!(arr.get(2).expect("no index 2"), &Bson::Int32(3)); + assert_eq!(arr.get(3).expect("no index 3"), &Bson::String("abc".into())); + assert!(arr.get(4).is_none()); + } + + #[test] + fn deserialize_js_with_scope() { + let mut docbytes = Vec::new(); + let doc = doc! {"js_with_scope": JavaScriptCodeWithScope { + code: String::from("console.log(value);"), + scope: doc!{"value": "Hello world"}, + }}; + doc.to_writer(&mut docbytes) + .expect("could not encode document"); + + let rawdoc = Doc::new(&docbytes).expect("Invalid document"); + assert!(rawdoc.get_javascript_with_scope("js_with_scope").is_ok()); + let map: HashMap<&str, (&str, HashMap<&str, &str>)> = + from_doc(rawdoc).expect("could not decode js with scope"); + assert_eq!( + map.get("js_with_scope").expect("no key js_with_scope").0, + "console.log(value);" + ); + assert_eq!( + map.get("js_with_scope") + .expect("no key js_with_scope") + .1 + .get("value") + .expect("no key value"), + &"Hello world", + ); + } + + #[test] + fn deserialize_regexp() { + let mut docbytes = Vec::new(); + let doc = doc! {"regex": crate::Regex { pattern: String::from("^_id$"), options: String::from("i") } }; + doc.to_writer(&mut docbytes) + .expect("could not encode document"); + let rawdoc = Doc::new(&docbytes).expect("Invalid document"); + assert!(rawdoc.get_regex("regex").is_ok()); + let map: HashMap<&str, (&str, &str)> = from_doc(rawdoc).expect("could not decode regex"); + assert_eq!(map.get("regex").expect("no key regex").0, "^_id$"); + assert_eq!(map.get("regex").expect("no key regex").1, "i"); + } + + #[test] + fn deserialize_utc_datetime_to_struct() { + #[derive(Deserialize)] + struct Dateish { + #[serde(with = "chrono::serde::ts_milliseconds")] + utc_datetime: chrono::DateTime, + } + let mut docbytes = Vec::new(); + let doc = doc! {"utc_datetime": Bson::DateTime(Utc::now())}; + doc.to_writer(&mut docbytes) + .expect("could not encode document"); + let rawdoc = DocBuf::new(docbytes).expect("invalid document"); + assert!(rawdoc.get_datetime("utc_datetime").is_ok()); + let value: Dateish = from_doc(&rawdoc).expect("could not decode utc_datetime"); + let elapsed = Utc::now().signed_duration_since(value.utc_datetime); + // The previous now was less than half a second ago + assert!(elapsed.num_milliseconds() >= 0); + assert!(elapsed.num_milliseconds() < 500); + } + + #[test] + fn deserialize_utc_datetime_as_chrono_datetime() { + let mut docbytes = Vec::new(); + let doc = doc! {"utc_datetime": Utc::now()}; + doc.to_writer(&mut docbytes) + .expect("could not encode document"); + let rawdoc = DocBuf::new(docbytes).expect("invalid document"); + assert!(rawdoc.get_datetime("utc_datetime").is_ok()); + let map: HashMap<&str, DateTime> = + from_doc(&rawdoc).expect("could not decode utc_datetime"); + + let dt = map.get("utc_datetime").expect("no key utc_datetime"); + println!("{:?}", dt); + let dt = dt.0; + let elapsed = Utc::now().signed_duration_since(dt); + // The previous now was less than half a second ago + assert!(elapsed.num_milliseconds() >= 0); + assert!(elapsed.num_milliseconds() < 500); + } + + #[test] + fn deserialize_object_id_as_bson() { + let mut docbytes = Vec::new(); + let doc = doc! { "object_id": ObjectId::with_string("123456123456123456123456").unwrap() }; + doc.to_writer(&mut docbytes) + .expect("could not encode document"); + let rawdoc = DocBuf::new(docbytes).expect("invalid document"); + assert!(rawdoc.get_object_id("object_id").is_ok()); + let map: HashMap<&str, Bson> = from_doc(&rawdoc).expect("could not decode object_id"); + assert_eq!( + map.get("object_id").unwrap(), + &Bson::ObjectId(ObjectId::with_string("123456123456123456123456").unwrap()) + ); + } + + #[test] + fn deserialize_utc_datetime_as_bson() { + let mut docbytes = Vec::new(); + let doc = doc! {"utc_datetime": Utc::now()}; + doc.to_writer(&mut docbytes) + .expect("could not encode document"); + let rawdoc = DocBuf::new(docbytes).expect("invalid document"); + assert!(rawdoc.get_datetime("utc_datetime").is_ok()); + let map: HashMap<&str, Bson> = from_doc(&rawdoc).expect("could not decode utc_datetime"); + + let dt = map.get("utc_datetime").expect("no key utc_datetime"); + let dt = dt + .as_datetime() + .expect("value was not of type Bson::DateTime"); + let elapsed = Utc::now().signed_duration_since(*dt); + // The previous now was less than half a second ago + assert!(elapsed.num_milliseconds() >= 0); + assert!(elapsed.num_milliseconds() < 500); + } + + #[test] + fn deserialize_utc_datetime_as_i64() { + let mut docbytes = Vec::new(); + let doc = doc! {"utc_datetime": Bson::DateTime(Utc::now())}; + doc.to_writer(&mut docbytes) + .expect("could not encode document"); + let rawdoc = DocBuf::new(docbytes).expect("invalid document"); + assert!(rawdoc.get_datetime("utc_datetime").is_ok()); + let map: HashMap<&str, i64> = + from_doc(&rawdoc).expect("could not decode utc_datetime as i64"); + let _time = map.get("utc_datetime").expect("no key utc_datetime"); + } +} diff --git a/src/raw/de/binary.rs b/src/raw/de/binary.rs new file mode 100644 index 00000000..29a1e6e9 --- /dev/null +++ b/src/raw/de/binary.rs @@ -0,0 +1,201 @@ +use serde::de::{DeserializeSeed, Deserializer, MapAccess, Visitor}; +use serde::forward_to_deserialize_any; + +use super::Error; +use crate::raw::elem::RawBsonBinary; +use crate::spec::BinarySubtype; + +pub static SUBTYPE_FIELD: &str = "$__bson_binary_subtype"; +pub static DATA_FIELD: &str = "$__bson_binary_data"; +pub static NAME: &str = "$__bson_Binary"; + +pub(super) struct BinaryDeserializer<'de> { + binary: RawBsonBinary<'de>, + visiting: Visiting, +} + +impl<'de> BinaryDeserializer<'de> { + pub(super) fn new(binary: RawBsonBinary<'de>) -> BinaryDeserializer<'de> { + BinaryDeserializer { + binary, + visiting: Visiting::New, + } + } +} + +impl<'de> Deserializer<'de> for BinaryDeserializer<'de> { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_bytes(visitor) + } + + fn deserialize_bytes(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_borrowed_bytes(self.binary.as_bytes()) + } + + fn deserialize_byte_buf(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_bytes(self.binary.as_bytes()) + } + + fn deserialize_map>(self, visitor: V) -> Result { + visitor.visit_map(self) + } + + fn deserialize_struct>( + self, + name: &str, + _fields: &[&str], + visitor: V, + ) -> Result { + if name == NAME { + visitor.visit_map(self) + } else { + Err(Error::MalformedDocument) + } + } + + forward_to_deserialize_any!( + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq + option unit newtype_struct + ignored_any unit_struct tuple_struct tuple enum identifier + ); +} + +enum Visiting { + New, + Subtype, + Data, + Done, +} + +impl<'de> MapAccess<'de> for BinaryDeserializer<'de> { + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> Result, Error> + where + K: DeserializeSeed<'de>, + { + match self.visiting { + Visiting::New => { + self.visiting = Visiting::Subtype; + seed.deserialize(BinaryKeyDeserializer::new(SUBTYPE_FIELD)) + .map(Some) + } + Visiting::Subtype => { + self.visiting = Visiting::Data; + seed.deserialize(BinaryKeyDeserializer::new(DATA_FIELD)) + .map(Some) + } + Visiting::Data => { + self.visiting = Visiting::Done; + Ok(None) + } + _ => Err(Error::MalformedDocument), + } + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: DeserializeSeed<'de>, + { + match self.visiting { + Visiting::Subtype => { + seed.deserialize(BinarySubtypeDeserializer::new(self.binary.subtype())) + } + Visiting::Data => seed.deserialize(BinaryDataDeserializer::new(self.binary)), + _ => Err(Error::MalformedDocument), + } + } +} + +struct BinaryKeyDeserializer { + key: &'static str, +} + +impl BinaryKeyDeserializer { + fn new(key: &'static str) -> BinaryKeyDeserializer { + BinaryKeyDeserializer { key } + } +} + +impl<'de> Deserializer<'de> for BinaryKeyDeserializer { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_str(self.key) + } + + forward_to_deserialize_any!( + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq + bytes byte_buf map struct option unit newtype_struct + ignored_any unit_struct tuple_struct tuple enum identifier + ); +} + +struct BinarySubtypeDeserializer { + subtype: BinarySubtype, +} + +impl BinarySubtypeDeserializer { + fn new(subtype: BinarySubtype) -> BinarySubtypeDeserializer { + BinarySubtypeDeserializer { subtype } + } +} + +impl<'de> Deserializer<'de> for BinarySubtypeDeserializer { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let subtype: u8 = self.subtype.into(); + visitor.visit_i32(subtype as i32) + } + + forward_to_deserialize_any!( + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq + bytes byte_buf map struct option unit newtype_struct + ignored_any unit_struct tuple_struct tuple enum identifier + ); +} + +struct BinaryDataDeserializer<'de> { + binary: RawBsonBinary<'de>, +} + +impl<'de> BinaryDataDeserializer<'de> { + fn new(binary: RawBsonBinary<'de>) -> BinaryDataDeserializer<'de> { + BinaryDataDeserializer { binary } + } +} + +impl<'de> Deserializer<'de> for BinaryDataDeserializer<'de> { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_borrowed_bytes(self.binary.as_bytes()) + } + + forward_to_deserialize_any!( + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq + bytes byte_buf map struct option unit newtype_struct + ignored_any unit_struct tuple_struct tuple enum identifier + ); +} diff --git a/src/raw/de/datetime.rs b/src/raw/de/datetime.rs new file mode 100644 index 00000000..fd170330 --- /dev/null +++ b/src/raw/de/datetime.rs @@ -0,0 +1,162 @@ +use std::convert::TryInto; + +use serde::de::{DeserializeSeed, Deserializer, MapAccess, Visitor}; +use serde::forward_to_deserialize_any; + +use super::Error; + +pub static NAME: &str = "$__bson_DateTime"; +pub static FIELD: &str = "$date"; +pub static FIELDS: &[&str] = &[FIELD]; + +struct DateTimeKeyDeserializer { + key: &'static str, +} + +impl DateTimeKeyDeserializer { + fn new(key: &'static str) -> DateTimeKeyDeserializer { + DateTimeKeyDeserializer { key } + } +} + +impl<'de> Deserializer<'de> for DateTimeKeyDeserializer { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_str(self.key) + } + + forward_to_deserialize_any!( + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq + bytes byte_buf map struct option unit newtype_struct + ignored_any unit_struct tuple_struct tuple enum identifier + ); +} + +pub struct DateTimeDeserializer { + data: i64, + visited: bool, +} + +impl DateTimeDeserializer { + pub fn new(data: i64) -> DateTimeDeserializer { + DateTimeDeserializer { + data, + visited: false, + } + } +} + +impl<'de> Deserializer<'de> for DateTimeDeserializer { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_struct(NAME, FIELDS, visitor) + } + + fn deserialize_i64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_i64(self.data) + } + + fn deserialize_u64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_u64(self.data.try_into()?) + } + + fn deserialize_map>(self, visitor: V) -> Result { + visitor.visit_map(self) + } + + fn deserialize_struct>( + self, + name: &str, + _fields: &[&str], + visitor: V, + ) -> Result { + if name == NAME { + visitor.visit_map(self) + } else { + Err(Error::MalformedDocument) + } + } + + forward_to_deserialize_any!( + bool u8 u16 u32 i8 i16 i32 f32 f64 char bytes byte_buf + option unit newtype_struct str string tuple + ignored_any seq unit_struct tuple_struct enum identifier + ); +} + +impl<'de> MapAccess<'de> for DateTimeDeserializer { + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> Result, Error> + where + K: DeserializeSeed<'de>, + { + match self.visited { + false => seed + .deserialize(DateTimeKeyDeserializer::new(FIELD)) + .map(Some), + true => Ok(None), + } + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: DeserializeSeed<'de>, + { + match self.visited { + false => { + self.visited = true; + seed.deserialize(DateTimeFieldDeserializer::new(self.data)) + } + true => Err(Error::MalformedDocument), + } + } +} + +struct DateTimeFieldDeserializer { + data: i64, +} + +impl<'de> DateTimeFieldDeserializer { + fn new(data: i64) -> DateTimeFieldDeserializer { + DateTimeFieldDeserializer { data } + } +} + +impl<'de> Deserializer<'de> for DateTimeFieldDeserializer { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_i64(visitor) + } + + fn deserialize_i64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_i64(self.data) + } + + forward_to_deserialize_any!( + bool u8 u16 u32 u64 i8 i16 i32 f32 f64 char seq + bytes byte_buf str string map struct option unit newtype_struct + ignored_any unit_struct tuple_struct tuple enum identifier + ); +} diff --git a/src/raw/de/js.rs b/src/raw/de/js.rs new file mode 100644 index 00000000..4f2caf2d --- /dev/null +++ b/src/raw/de/js.rs @@ -0,0 +1,229 @@ +use serde::de::{DeserializeSeed, Deserializer, MapAccess, SeqAccess, Visitor}; +use serde::forward_to_deserialize_any; + +use crate::raw::Doc; +use super::{BsonDeserializer, Error}; + +pub static NAME: &str = "$__bson_JavaScript"; +pub static WITH_SCOPE_NAME: &str = "$__bson_JavaScriptWithScope"; +pub static DATA_FIELD: &str = "$__bson_javascript_data"; +pub static SCOPE_FIELD: &str = "$__bson_javascript_scope"; +pub static FIELDS: &[&str] = &[DATA_FIELD]; +pub static WITH_SCOPE_FIELDS: &[&str] = &[DATA_FIELD, SCOPE_FIELD]; + +struct JavaScriptKeyDeserializer { + key: &'static str, +} + +impl JavaScriptKeyDeserializer { + fn new(key: &'static str) -> JavaScriptKeyDeserializer { + JavaScriptKeyDeserializer { key } + } +} + +impl<'de> Deserializer<'de> for JavaScriptKeyDeserializer { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_str(self.key) + } + + forward_to_deserialize_any!( + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq + bytes byte_buf map struct option unit newtype_struct + ignored_any unit_struct tuple_struct tuple enum identifier + ); +} + +pub(super) struct JavaScriptWithScopeDeserializer<'de> { + js: &'de str, + scope: &'de Doc, + visiting: ScopedVisiting, +} + +impl<'de> JavaScriptWithScopeDeserializer<'de> { + pub(super) fn new + ?Sized>( + data: (&'de str, &'de D), + ) -> JavaScriptWithScopeDeserializer<'de> { + JavaScriptWithScopeDeserializer { + js: data.0, + scope: data.1.as_ref(), + visiting: ScopedVisiting::Js, + } + } +} + +impl<'de> Deserializer<'de> for JavaScriptWithScopeDeserializer<'de> { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + fn deserialize_str(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_borrowed_str(self.js) + } + + fn deserialize_string(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_str(self.js) + } + + fn deserialize_seq(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_seq(self) + } + + fn deserialize_tuple(self, ct: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + if ct != 2 { + Err(Error::MalformedDocument) + } else { + visitor.visit_seq(self) + } + } + + fn deserialize_map>(self, visitor: V) -> Result { + visitor.visit_map(self) + } + + fn deserialize_struct>( + self, + name: &str, + _fields: &[&str], + visitor: V, + ) -> Result { + if name == NAME { + visitor.visit_map(self) + } else { + Err(Error::MalformedDocument) + } + } + + forward_to_deserialize_any!( + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char bytes byte_buf + option unit newtype_struct + ignored_any unit_struct tuple_struct enum identifier + ); +} + +enum ScopedVisiting { + Js, + Scope, + Done, +} + +impl<'de> SeqAccess<'de> for JavaScriptWithScopeDeserializer<'de> { + type Error = Error; + + fn next_element_seed(&mut self, seed: E) -> Result, Error> + where + E: DeserializeSeed<'de>, + { + match self.visiting { + ScopedVisiting::Js => { + self.visiting = ScopedVisiting::Scope; + seed.deserialize(JavaScriptWithScopeJsDeserializer::new(self.js)) + .map(Some) + } + ScopedVisiting::Scope => { + self.visiting = ScopedVisiting::Done; + seed.deserialize(&mut BsonDeserializer::from_doc(&self.scope)) + .map(Some) + } + ScopedVisiting::Done => Ok(None), + } + } +} + +impl<'de> MapAccess<'de> for JavaScriptWithScopeDeserializer<'de> { + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> Result, Error> + where + K: DeserializeSeed<'de>, + { + match self.visiting { + ScopedVisiting::Js => seed + .deserialize(JavaScriptKeyDeserializer::new(DATA_FIELD)) + .map(Some), + ScopedVisiting::Scope => seed + .deserialize(JavaScriptKeyDeserializer::new(SCOPE_FIELD)) + .map(Some), + ScopedVisiting::Done => Ok(None), + } + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: DeserializeSeed<'de>, + { + match self.visiting { + ScopedVisiting::Js => { + self.visiting = ScopedVisiting::Scope; + seed.deserialize(JavaScriptWithScopeJsDeserializer::new(self.js)) + } + ScopedVisiting::Scope => { + self.visiting = ScopedVisiting::Done; + seed.deserialize(&mut BsonDeserializer::from_doc(self.scope)) + } + ScopedVisiting::Done => Err(Error::MalformedDocument), + } + } +} + +struct JavaScriptWithScopeJsDeserializer<'de> { + data: &'de str, +} + +impl<'de> JavaScriptWithScopeJsDeserializer<'de> { + fn new(data: &'de str) -> JavaScriptWithScopeJsDeserializer<'de> { + JavaScriptWithScopeJsDeserializer { data } + } +} + +impl<'de> Deserializer<'de> for JavaScriptWithScopeJsDeserializer<'de> { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_borrowed_str(self.data) + } + + fn deserialize_str(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_borrowed_str(self.data) + } + + fn deserialize_string(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_str(self.data) + } + + forward_to_deserialize_any!( + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char seq + bytes byte_buf map struct option unit newtype_struct + ignored_any unit_struct tuple_struct tuple enum identifier + ); +} diff --git a/src/raw/de/object_id.rs b/src/raw/de/object_id.rs new file mode 100644 index 00000000..03e10ed2 --- /dev/null +++ b/src/raw/de/object_id.rs @@ -0,0 +1,146 @@ +// ObjectId handling + +use serde::de::{DeserializeSeed, Deserializer, MapAccess, Visitor}; +use serde::forward_to_deserialize_any; + +use super::Error; +use crate::raw::elem::Element; +use crate::spec::ElementType; + +pub static FIELD: &str = "$oid"; +pub static FIELDS: &[&str] = &[FIELD]; +pub static NAME: &str = "$__bson_ObjectId"; + +pub struct RawObjectIdDeserializer<'de> { + bson: Element<'de>, + visited: bool, +} + +impl<'de> RawObjectIdDeserializer<'de> { + pub fn new(bson: Element<'de>) -> RawObjectIdDeserializer<'de> { + RawObjectIdDeserializer { + bson, + visited: false, + } + } +} + +impl<'de> Deserializer<'de> for RawObjectIdDeserializer<'de> { + type Error = Error; + + fn deserialize_any>(self, visitor: V) -> Result { + match self.bson.element_type() { + ElementType::ObjectId => self.deserialize_struct(NAME, FIELDS, visitor), + _ => Err(Error::MalformedDocument), + } + } + + fn deserialize_bytes>(self, visitor: V) -> Result { + match self.bson.element_type() { + ElementType::ObjectId => visitor.visit_bytes(self.bson.as_bytes()), + _ => Err(Error::MalformedDocument), + } + } + + fn deserialize_map>(self, visitor: V) -> Result { + visitor.visit_map(self) + } + + fn deserialize_struct>( + self, + name: &'static str, + fields: &'static [&'static str], + visitor: V, + ) -> Result { + if name == NAME && fields == FIELDS { + visitor.visit_map(self) + } else { + Err(Error::MalformedDocument) + } + } + + forward_to_deserialize_any!( + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq + byte_buf option unit newtype_struct + ignored_any unit_struct tuple_struct tuple enum identifier + ); +} + +impl<'de> MapAccess<'de> for RawObjectIdDeserializer<'de> { + type Error = Error; + + fn next_key_seed( + &mut self, + seed: K, + ) -> Result>::Value>, Self::Error> + where + K: DeserializeSeed<'de>, + { + if self.visited { + Ok(None) + } else { + self.visited = true; + seed.deserialize(ObjectIdKeyDeserializer).map(Some) + } + } + + fn next_value_seed( + &mut self, + seed: V, + ) -> Result<>::Value, Self::Error> + where + V: DeserializeSeed<'de>, + { + seed.deserialize(ObjectIdValueDeserializer::new(self.bson)) + } +} + +pub(crate) struct ObjectIdKeyDeserializer; + +impl<'de> Deserializer<'de> for ObjectIdKeyDeserializer { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_borrowed_str(FIELD) + } + + forward_to_deserialize_any!( + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq + bytes byte_buf map struct option unit newtype_struct + ignored_any unit_struct tuple_struct tuple enum identifier + ); +} + +struct ObjectIdValueDeserializer<'de>(Element<'de>); + +impl<'de> ObjectIdValueDeserializer<'de> { + fn new(bson: Element<'de>) -> ObjectIdValueDeserializer<'de> { + ObjectIdValueDeserializer(bson) + } +} + +impl<'de> Deserializer<'de> for ObjectIdValueDeserializer<'de> { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self.0.element_type() { + ElementType::ObjectId => { + let hex = self.0.as_object_id()?.to_hex(); + visitor.visit_string(hex) + } + _ => Err(Error::MalformedDocument), + } + } + + forward_to_deserialize_any!( + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq + bytes byte_buf map struct option unit newtype_struct + ignored_any unit_struct tuple_struct tuple enum identifier + ); +} diff --git a/src/raw/de/regex.rs b/src/raw/de/regex.rs new file mode 100644 index 00000000..5ae74258 --- /dev/null +++ b/src/raw/de/regex.rs @@ -0,0 +1,209 @@ +use serde::de::{DeserializeSeed, Deserializer, MapAccess, SeqAccess, Visitor}; +use serde::forward_to_deserialize_any; + +use super::Error; +use crate::raw::elem::RawBsonRegex; + +pub static NAME: &str = "$__bson_Regex"; +pub static REGEXP_FIELD: &str = "$__bson_regexp_regexp"; +pub static OPTIONS_FIELD: &str = "$__bson_regexp_options"; +pub static FIELDS: &[&str] = &[REGEXP_FIELD, OPTIONS_FIELD]; + +struct RegexKeyDeserializer { + key: &'static str, +} + +impl RegexKeyDeserializer { + fn new(key: &'static str) -> RegexKeyDeserializer { + RegexKeyDeserializer { key } + } +} + +impl<'de> Deserializer<'de> for RegexKeyDeserializer { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_str(self.key) + } + + forward_to_deserialize_any!( + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq + bytes byte_buf map struct option unit newtype_struct + ignored_any unit_struct tuple_struct tuple enum identifier + ); +} + +pub(super) struct RegexDeserializer<'de> { + data: RawBsonRegex<'de>, + visiting: Visiting, +} + +impl<'de> RegexDeserializer<'de> { + pub(super) fn new(data: RawBsonRegex<'de>) -> RegexDeserializer<'de> { + RegexDeserializer { + data, + visiting: Visiting::Regex, + } + } +} + +impl<'de> Deserializer<'de> for RegexDeserializer<'de> { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + fn deserialize_seq(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_seq(self) + } + + fn deserialize_tuple(self, ct: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + if ct == 2 { + visitor.visit_seq(self) + } else { + Err(Error::MalformedDocument) + } + } + + fn deserialize_map>(self, visitor: V) -> Result { + visitor.visit_map(self) + } + + fn deserialize_struct>( + self, + name: &str, + _fields: &[&str], + visitor: V, + ) -> Result { + if name == NAME { + visitor.visit_map(self) + } else { + Err(Error::MalformedDocument) + } + } + + forward_to_deserialize_any!( + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char bytes byte_buf + option unit newtype_struct str string + ignored_any unit_struct tuple_struct enum identifier + ); +} + +enum Visiting { + Regex, + Options, + Done, +} + +impl<'de> SeqAccess<'de> for RegexDeserializer<'de> { + type Error = Error; + + fn next_element_seed(&mut self, seed: E) -> Result, Error> + where + E: DeserializeSeed<'de>, + { + match self.visiting { + Visiting::Regex => { + self.visiting = Visiting::Options; + seed.deserialize(RegexFieldDeserializer::new(self.data.pattern())) + .map(Some) + } + Visiting::Options => { + self.visiting = Visiting::Done; + seed.deserialize(RegexFieldDeserializer::new(self.data.options())) + .map(Some) + } + Visiting::Done => Ok(None), + } + } +} + +impl<'de> MapAccess<'de> for RegexDeserializer<'de> { + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> Result, Error> + where + K: DeserializeSeed<'de>, + { + match self.visiting { + Visiting::Regex => seed + .deserialize(RegexKeyDeserializer::new(REGEXP_FIELD)) + .map(Some), + Visiting::Options => seed + .deserialize(RegexKeyDeserializer::new(OPTIONS_FIELD)) + .map(Some), + Visiting::Done => Ok(None), + } + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: DeserializeSeed<'de>, + { + match self.visiting { + Visiting::Regex => { + self.visiting = Visiting::Options; + seed.deserialize(RegexFieldDeserializer::new(self.data.pattern())) + } + Visiting::Options => { + self.visiting = Visiting::Done; + seed.deserialize(RegexFieldDeserializer::new(self.data.options())) + } + Visiting::Done => Err(Error::MalformedDocument), + } + } +} + +struct RegexFieldDeserializer<'de> { + data: &'de str, +} + +impl<'de> RegexFieldDeserializer<'de> { + fn new(data: &'de str) -> RegexFieldDeserializer<'de> { + RegexFieldDeserializer { data } + } +} + +impl<'de> Deserializer<'de> for RegexFieldDeserializer<'de> { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_borrowed_str(self.data) + } + + fn deserialize_str(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_borrowed_str(self.data) + } + + fn deserialize_string(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_str(self.data) + } + + forward_to_deserialize_any!( + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char seq + bytes byte_buf map struct option unit newtype_struct + ignored_any unit_struct tuple_struct tuple enum identifier + ); +} diff --git a/src/raw/elem.rs b/src/raw/elem.rs new file mode 100644 index 00000000..e38ebdee --- /dev/null +++ b/src/raw/elem.rs @@ -0,0 +1,382 @@ +use std::{ + convert::{TryFrom, TryInto}, + time::Duration, +}; + +use crate::oid; +pub use crate::spec::{BinarySubtype, ElementType}; +use chrono::{DateTime, TimeZone, Utc}; + +#[cfg(feature = "decimal128")] +use super::d128_from_slice; +use super::{ + i32_from_slice, i64_from_slice, read_lenencoded, read_nullterminated, u32_from_slice, Array, + Doc, RawError, RawResult, +}; + +#[derive(Clone, Copy, Debug)] +pub struct Element<'a> { + element_type: ElementType, + data: &'a [u8], +} + +impl<'a> Element<'a> { + // This is not public. An Element object can only be created by iterating over a bson document method + // on RawBsonDoc + pub(super) fn new(element_type: ElementType, data: &'a [u8]) -> Element<'a> { + Element { element_type, data } + } + + pub fn element_type(self) -> ElementType { + self.element_type + } + + pub fn as_bytes(self) -> &'a [u8] { + self.data + } + + pub fn as_f64(self) -> RawResult { + if let ElementType::Double = self.element_type { + Ok(f64::from_bits(u64::from_le_bytes( + self.data + .try_into() + .map_err(|_| RawError::MalformedValue("f64 should be 8 bytes long".into()))?, + ))) + } else { + Err(RawError::UnexpectedType) + } + } + + pub fn as_str(self) -> RawResult<&'a str> { + if let ElementType::String = self.element_type { + read_lenencoded(self.data) + } else { + Err(RawError::UnexpectedType) + } + } + + pub fn as_document(self) -> RawResult<&'a Doc> { + if let ElementType::EmbeddedDocument = self.element_type { + Doc::new(self.data) + } else { + Err(RawError::UnexpectedType) + } + } + + pub fn as_array(self) -> RawResult<&'a Array> { + if let ElementType::Array = self.element_type { + Array::new(self.data) + } else { + Err(RawError::UnexpectedType) + } + } + + pub fn as_binary(self) -> RawResult> { + if let ElementType::Binary = self.element_type { + let length = i32_from_slice(&self.data[0..4]); + let subtype = BinarySubtype::from(self.data[4]); // TODO: This mishandles reserved values + if self.data.len() as i32 != length + 5 { + return Err(RawError::MalformedValue( + "binary bson has wrong declared length".into(), + )); + } + let data = match subtype { + BinarySubtype::BinaryOld => { + if length < 4 { + return Err(RawError::MalformedValue( + "old binary subtype has no inner declared length".into(), + )); + } + let oldlength = i32_from_slice(&self.data[5..9]); + if oldlength + 4 != length { + return Err(RawError::MalformedValue( + "old binary subtype has wrong inner declared length".into(), + )); + } + &self.data[9..] + } + _ => &self.data[5..], + }; + Ok(RawBsonBinary::new(subtype, data)) + } else { + Err(RawError::UnexpectedType) + } + } + + pub fn as_object_id(self) -> RawResult { + if let ElementType::ObjectId = self.element_type { + Ok(oid::ObjectId::with_bytes(self.data.try_into().map_err( + |_| RawError::MalformedValue("object id should be 12 bytes long".into()), + )?)) + } else { + Err(RawError::UnexpectedType) + } + } + + pub fn as_bool(self) -> RawResult { + if let ElementType::Boolean = self.element_type { + if self.data.len() != 1 { + Err(RawError::MalformedValue("boolean has length != 1".into())) + } else { + match self.data[0] { + 0 => Ok(false), + 1 => Ok(true), + _ => Err(RawError::MalformedValue( + "boolean value was not 0 or 1".into(), + )), + } + } + } else { + Err(RawError::UnexpectedType) + } + } + + pub fn as_datetime(self) -> RawResult> { + if let ElementType::DateTime = self.element_type { + let millis = i64_from_slice(self.data); + if millis >= 0 { + let duration = Duration::from_millis(millis as u64); + Ok(Utc.timestamp( + duration.as_secs().try_into().unwrap(), + duration.subsec_nanos(), + )) + } else { + let duration = Duration::from_millis((-millis).try_into().unwrap()); + let mut secs: i64 = duration.as_secs().try_into().unwrap(); + secs *= -1; + let mut nanos = duration.subsec_nanos(); + if nanos > 0 { + secs -= 1; + nanos = 1_000_000_000 - nanos; + } + Ok(Utc.timestamp(secs, nanos)) + } + } else { + Err(RawError::UnexpectedType) + } + } + + pub fn as_null(self) -> RawResult<()> { + if let ElementType::Null = self.element_type { + Ok(()) + } else { + Err(RawError::UnexpectedType) + } + } + + pub fn as_regex(self) -> RawResult> { + if let ElementType::RegularExpression = self.element_type { + RawBsonRegex::new(self.data) + } else { + Err(RawError::UnexpectedType) + } + } + + pub fn as_javascript(self) -> RawResult<&'a str> { + if let ElementType::JavaScriptCode = self.element_type { + read_lenencoded(self.data) + } else { + Err(RawError::UnexpectedType) + } + } + + pub fn as_symbol(self) -> RawResult<&'a str> { + if let ElementType::Symbol = self.element_type { + read_lenencoded(self.data) + } else { + Err(RawError::UnexpectedType) + } + } + + pub fn as_javascript_with_scope(self) -> RawResult<(&'a str, &'a Doc)> { + if let ElementType::JavaScriptCodeWithScope = self.element_type { + let length = i32_from_slice(&self.data[..4]); + assert_eq!(self.data.len() as i32, length); + + let js = read_lenencoded(&self.data[4..])?; + let doc = Doc::new(&self.data[9 + js.len()..])?; + + Ok((js, doc)) + } else { + Err(RawError::UnexpectedType) + } + } + + pub fn as_i32(self) -> RawResult { + if let ElementType::Int32 = self.element_type { + assert_eq!(self.data.len(), 4); + Ok(i32_from_slice(self.data)) + } else { + Err(RawError::UnexpectedType) + } + } + + pub fn as_timestamp(self) -> RawResult> { + if let ElementType::Timestamp = self.element_type { + assert_eq!(self.data.len(), 8); + Ok(RawBsonTimestamp { data: self.data }) + } else { + Err(RawError::UnexpectedType) + } + } + + pub fn as_i64(self) -> RawResult { + if let ElementType::Int64 = self.element_type { + assert_eq!(self.data.len(), 8); + Ok(i64_from_slice(self.data)) + } else { + Err(RawError::UnexpectedType) + } + } + + #[cfg(feature = "decimal128")] + pub fn as_decimal128(self) -> RawResult { + if let ElementType::Decimal128 = self.element_type { + assert_eq!(self.data.len(), 16); + Ok(d128_from_slice(self.data)) + } else { + Err(RawError::UnexpectedType) + } + } +} + +impl<'a> TryFrom> for crate::Bson { + type Error = RawError; + + fn try_from(rawbson: Element<'a>) -> RawResult { + Ok(match rawbson.element_type { + ElementType::Double => crate::Bson::Double(rawbson.as_f64()?), + ElementType::String => crate::Bson::String(String::from(rawbson.as_str()?)), + ElementType::EmbeddedDocument => { + let rawdoc = rawbson.as_document()?; + let doc = rawdoc.try_into()?; + crate::Bson::Document(doc) + } + ElementType::Array => { + let rawarray = rawbson.as_array()?; + let v = rawarray.try_into()?; + crate::Bson::Array(v) + } + ElementType::Binary => { + let RawBsonBinary { subtype, data } = rawbson.as_binary()?; + crate::Bson::Binary(crate::Binary { + subtype, + bytes: data.to_vec(), + }) + } + ElementType::ObjectId => crate::Bson::ObjectId(rawbson.as_object_id()?), + ElementType::Boolean => crate::Bson::Boolean(rawbson.as_bool()?), + ElementType::DateTime => crate::Bson::DateTime(rawbson.as_datetime()?), + ElementType::Null => crate::Bson::Null, + ElementType::RegularExpression => { + let rawregex = rawbson.as_regex()?; + crate::Bson::RegularExpression(crate::Regex { + pattern: String::from(rawregex.pattern()), + options: String::from(rawregex.options()), + }) + } + ElementType::JavaScriptCode => { + crate::Bson::JavaScriptCode(String::from(rawbson.as_javascript()?)) + } + ElementType::Int32 => crate::Bson::Int32(rawbson.as_i32()?), + ElementType::Timestamp => { + // RawBson::as_timestamp() returns u64, but crate::Bson::Timestamp expects i64 + let ts = rawbson.as_timestamp()?; + crate::Bson::Timestamp(crate::Timestamp { + time: ts.time(), + increment: ts.increment(), + }) + } + ElementType::Int64 => crate::Bson::Int64(rawbson.as_i64()?), + ElementType::Undefined => crate::Bson::Null, + ElementType::DbPointer => panic!("Uh oh. Maybe this should be a TryFrom"), + ElementType::Symbol => crate::Bson::Symbol(String::from(rawbson.as_symbol()?)), + ElementType::JavaScriptCodeWithScope => { + let (js, scope) = rawbson.as_javascript_with_scope()?; + crate::Bson::JavaScriptCodeWithScope(crate::JavaScriptCodeWithScope { + code: String::from(js), + scope: scope.try_into()?, + }) + } + #[cfg(feature = "decimal128")] + ElementType::Decimal128 => crate::Bson::Decimal128(rawbson.as_decimal128()?), + + #[cfg(not(feature = "decimal128"))] + ElementType::Decimal128 => return Err(RawError::UnexpectedType), + ElementType::MaxKey => unimplemented!(), + ElementType::MinKey => unimplemented!(), + }) + } +} + +#[derive(Clone, Copy, Debug)] +pub struct RawBsonBinary<'a> { + pub(super) subtype: BinarySubtype, + pub(super) data: &'a [u8], +} + +impl<'a> RawBsonBinary<'a> { + pub fn new(subtype: BinarySubtype, data: &'a [u8]) -> RawBsonBinary<'a> { + RawBsonBinary { subtype, data } + } + + /// Return the BinarySubtype. + pub fn subtype(self) -> BinarySubtype { + self.subtype + } + + /// Return the binary data as raw bytes. + pub fn as_bytes(self) -> &'a [u8] { + self.data + } +} + +#[derive(Clone, Copy, Debug)] +pub struct RawBsonRegex<'a> { + pub(super) pattern: &'a str, + pub(super) options: &'a str, +} + +impl<'a> RawBsonRegex<'a> { + pub fn new(data: &'a [u8]) -> RawResult> { + let pattern = read_nullterminated(data)?; + let opts = read_nullterminated(&data[pattern.len() + 1..])?; + if pattern.len() + opts.len() == data.len() - 2 { + Ok(RawBsonRegex { + pattern, + options: opts, + }) + } else { + Err(RawError::MalformedValue( + "expected two null-terminated strings".into(), + )) + } + } + + pub fn pattern(self) -> &'a str { + self.pattern + } + + pub fn options(self) -> &'a str { + self.options + } +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct RawBsonTimestamp<'a> { + data: &'a [u8], +} + +impl<'a> RawBsonTimestamp<'a> { + /// Return the time portion of the timestamp. + pub fn time(&self) -> u32 { + // RawBsonTimestamp can only be constructed with the correct data length, so this should always succeed. + u32_from_slice(&self.data[4..8]) + } + + /// Return the increment portion of the timestamp. + pub fn increment(&self) -> u32 { + // RawBsonTimestamp can only be constructed with the correct data length, so this should always succeed. + u32_from_slice(&self.data[0..4]) + } +} diff --git a/src/raw/mod.rs b/src/raw/mod.rs new file mode 100644 index 00000000..8f6a1e37 --- /dev/null +++ b/src/raw/mod.rs @@ -0,0 +1,1849 @@ +/*! +A rawbson document can be created from a `Vec` containing raw BSON data, and elements +accessed via methods similar to those in the [bson-rust](https://crates.io/crate/bson-rust) +crate. Note that rawbson returns a Result>, since the bytes contained in the +document are not fully validated until trying to access the contained data. + +```rust +use bson::raw::{ + DocBuf, + elem, +}; + +// \x13\x00\x00\x00 // total document size +// \x02 // 0x02 = type String +// hi\x00 // field name +// \x06\x00\x00\x00y'all\x00 // field value +// \x00 // document terminating NUL + +let doc = DocBuf::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +let elem: Option = doc.get("hi")?; +assert_eq!( + elem.unwrap().as_str()?, + "y'all", +); +# Ok::<(), bson::raw::RawError>(()) +``` + +### bson-rust interop + +This crate is designed to interoperate smoothly with the bson crate. + +A [`DocBuf`] can be created from a [`bson::document::Document`]. Internally, this +serializes the `Document` to a `Vec`, and then includes those bytes in the [`DocBuf`]. + +```rust +use bson::doc; +use bson::raw::{ + DocBuf, +}; + +let document = doc!{"goodbye": {"cruel": "world"}}; +let raw = DocBuf::from_document(&document); +let value: Option<&str> = raw.get_document("goodbye")? + .map(|doc| doc.get_str("cruel")) + .transpose()? + .flatten(); + +assert_eq!( + value, + Some("world"), +); +# Ok::<(), bson::raw::RawError>(()) +``` + +### Reference types + +A BSON document can also be accessed with the [`Doc`] reference type, +which is an unsized type that represents the BSON payload as a `[u8]`. +This allows accessing nested documents without reallocation. [Doc] +must always be accessed via a pointer type, similarly to `[T]` and `str`. + +This type will coexist with the now deprecated [DocRef] type for at +least one minor release. + +The below example constructs a bson document in a stack-based array, +and extracts a &str from it, performing no heap allocation. + +```rust +use bson::raw::Doc; + +let bytes = b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00"; +assert_eq!(Doc::new(bytes)?.get_str("hi")?, Some("y'all")); +# Ok::<(), bson::raw::RawError>(()) +``` + +### Iteration + +[`Doc`] implements [`IntoIterator`](std::iter::IntoIterator), which can also +be accessed via [`DocBuf::iter`]. + +```rust +use bson::doc; +use bson::raw::{DocBuf, elem::Element}; + +let doc = DocBuf::from_document(&doc! {"crate": "rawbson", "license": "MIT"}); +let mut dociter = doc.iter(); + +let (key, value): (&str, Element) = dociter.next().unwrap()?; +assert_eq!(key, "crate"); +assert_eq!(value.as_str()?, "rawbson"); + +let (key, value): (&str, Element) = dociter.next().unwrap()?; +assert_eq!(key, "license"); +assert_eq!(value.as_str()?, "MIT"); +# Ok::<(), bson::raw::RawError>(()) +``` + +### serde support + +There is also serde deserialization support. + +Serde serialization support is not yet provided. For now, use +[`bson::to_document`] instead, and then serialize it out using +[`bson::Document::to_writer`] or [`DocBuf::from_document`]. + +```rust +use serde::Deserialize; +use bson::{doc, Document, oid::ObjectId, DateTime}; +use bson::raw::{DocBuf, de::from_docbuf}; + +#[derive(Deserialize)] +#[serde(rename_all="camelCase")] +struct User { + #[serde(rename = "_id")] + id: ObjectId, + first_name: String, + last_name: String, + birthdate: Option>, + #[serde(flatten)] + extra: Document, +} + +let doc = DocBuf::from_document(&doc!{ + "_id": ObjectId::with_string("543254325432543254325432")?, + "firstName": "John", + "lastName": "Doe", + "birthdate": null, + "luckyNumbers": [3, 60, 2147483647], + "nickname": "Red", +}); + +let user: User = from_docbuf(&doc)?; +assert_eq!(user.id.to_hex(), "543254325432543254325432"); +assert_eq!(user.first_name, "John"); +assert_eq!(user.last_name, "Doe"); +assert_eq!(user.extra.get_str("nickname")?, "Red"); +assert!(user.birthdate.is_none()); +# Ok::<(), Box>(()) +``` +*/ + +use std::{ + borrow::Borrow, + convert::{TryFrom, TryInto}, + ops::Deref, +}; + +use chrono::{DateTime, Utc}; + +#[cfg(feature = "decimal128")] +use crate::decimal128::Decimal128; + +use crate::{document::ValueAccessError, oid, spec::ElementType, Bson}; + +pub mod de; +pub mod elem; + +#[cfg(test)] +mod props; + +/// Error to indicate that either a value was empty or it contained an unexpected +/// type, for use with the direct getters. +#[derive(Debug, PartialEq)] +pub enum RawError { + /// Found a Bson value with the specified key, but not with the expected type + UnexpectedType, + + /// The found value was not well-formed + MalformedValue(String), + + /// Found a value where a utf-8 string was expected, but it was not valid + /// utf-8. The error value contains the malformed data as a string. + Utf8EncodingError(Vec), +} + +impl std::fmt::Display for RawError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + use RawError::*; + match self { + UnexpectedType => write!(f, "unexpected type"), + MalformedValue(s) => write!(f, "malformed value: {:?}", s), + Utf8EncodingError(_) => write!(f, "utf-8 encoding error"), + } + } +} + +impl std::error::Error for RawError {} + +pub type RawResult = Result; +type OptResult = RawResult>; + +impl<'a> From for ValueAccessError { + fn from(src: RawError) -> ValueAccessError { + match src { + RawError::UnexpectedType => ValueAccessError::UnexpectedType, + RawError::MalformedValue(_) => ValueAccessError::UnexpectedType, + RawError::Utf8EncodingError(_) => ValueAccessError::UnexpectedType, + } + } +} + +impl<'a> From for RawError { + fn from(src: ValueAccessError) -> RawError { + match src { + ValueAccessError::NotPresent => unreachable!("This should be converted to an Option"), + ValueAccessError::UnexpectedType => RawError::UnexpectedType, + } + } +} + +/// A BSON document, stored as raw binary data on the heap. This can be created from +/// a `Vec` or a [`bson::Document`]. +/// +/// Accessing elements within the `DocBuf` is similar to element access in [bson::Document], +/// but as the contents are parsed during iteration, instead of at creation time, format +/// errors can happen at any time during use, instead of at creation time. +/// +/// DocBuf can be iterated over, yielding a Result containing key-value pairs that +/// borrow from the DocBuf instead of allocating, when necessary. +/// +/// ``` +/// # use bson::raw::{DocBuf, RawError}; +/// let docbuf = DocBuf::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +/// let mut iter = docbuf.iter(); +/// let (key, value) = iter.next().unwrap()?; +/// assert_eq!(key, "hi"); +/// assert_eq!(value.as_str(), Ok("y'all")); +/// assert!(iter.next().is_none()); +/// # Ok::<(), RawError>(()) +/// ``` +/// +/// Individual elements can be accessed using [`docbuf.get(&key)`](Doc::get), or any of +/// the `get_*` methods, like [`docbuf.get_object_id(&key)`](Doc::get_object_id), and +/// [`docbuf.get_str(&str)`](Doc::get_str). Accessing elements is an O(N) operation, +/// as it requires iterating through the document from the beginning to find the requested +/// key. +/// +/// ``` +/// # use bson::raw::{DocBuf, RawError}; +/// let docbuf = DocBuf::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +/// assert_eq!(docbuf.get_str("hi")?, Some("y'all")); +/// # Ok::<(), RawError>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct DocBuf { + data: Box<[u8]>, +} + +impl DocBuf { + /// Create a new `DocBuf` from the provided `Vec`. + /// + /// The data is checked for a declared length equal to the length of the Vec, + /// and a trailing NUL byte. Other validation is deferred to access time. + /// + /// ``` + /// # use bson::raw::{DocBuf, RawError}; + /// let docbuf: DocBuf = DocBuf::new(b"\x05\0\0\0\0".to_vec())?; + /// # Ok::<(), RawError>(()) + /// ``` + pub fn new(data: Vec) -> RawResult { + if data.len() < 5 { + return Err(RawError::MalformedValue("document too short".into())); + } + let length = i32_from_slice(&data[..4]); + if data.len() as i32 != length { + return Err(RawError::MalformedValue("document length incorrect".into())); + } + if data[data.len() - 1] != 0 { + return Err(RawError::MalformedValue( + "document not null-terminated".into(), + )); + } + Ok(unsafe { DocBuf::new_unchecked(data) }) + } + + /// Create a DocBuf from a [bson::Document]. + /// + /// ``` + /// # use bson::raw::{DocBuf, RawError}; + /// use bson::{doc, oid}; + /// let document = doc! { + /// "_id": oid::ObjectId::new(), + /// "name": "Herman Melville", + /// "title": "Moby-Dick", + /// }; + /// let docbuf: DocBuf = DocBuf::from_document(&document); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn from_document(doc: &crate::Document) -> DocBuf { + let mut data = Vec::new(); + doc.to_writer(&mut data).unwrap(); + unsafe { DocBuf::new_unchecked(data) } + } + + /// Create a DocBuf from an owned Vec without performing any checks on the provided data. + /// + /// ``` + /// # use bson::raw::{DocBuf, RawError}; + /// let docbuf: DocBuf = unsafe { + /// DocBuf::new_unchecked(b"\x05\0\0\0\0".to_vec()) + /// }; + /// # Ok::<(), RawError>(()) + /// ``` + /// + /// # Safety + /// + /// The provided bytes must have a valid length marker, and be NUL terminated. + pub unsafe fn new_unchecked(data: Vec) -> DocBuf { + DocBuf { + data: data.into_boxed_slice(), + } + } + + /// Return a [`&Doc`](Doc) borrowing from the data contained in self. + /// + /// # Deprecation + /// + /// DocRef is now a deprecated type alias for [Doc]. DocBuf can + /// dereference to &Doc directly, or be converted using [AsRef::as_ref], + /// so this function is unnecessary. + /// + /// ``` + /// # use bson::raw::{DocBuf, DocRef, RawError}; + /// let docbuf = DocBuf::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; + /// let docref: DocRef = docbuf.as_docref(); + /// # Ok::<(), RawError>(()) + /// ``` + #[deprecated(since = "0.2.0", note = "use docbuf.as_ref() instead")] + pub fn as_docref(&self) -> &Doc { + self.as_ref() + } + + /// Return an iterator over the elements in the `DocBuf`, borrowing data. + /// + /// The associated item type is `Result<&str, Element<'_>>`. An error is + /// returned if data is malformed. + /// + /// ``` + /// # use bson::raw::{elem, DocBuf, RawError}; + /// use bson::doc; + /// let docbuf = DocBuf::from_document(&doc! { "ferris": true }); + /// for element in docbuf.iter() { + /// let (key, value): (&str, elem::Element) = element?; + /// assert_eq!(key, "ferris"); + /// assert_eq!(value.as_bool()?, true); + /// } + /// # Ok::<(), RawError>(()) + /// ``` + /// + /// # Note: + /// + /// There is no owning iterator for DocBuf. If you need ownership over + /// elements that might need to allocate, you must explicitly convert + /// them to owned types yourself. + pub fn iter(&self) -> DocIter<'_> { + self.into_iter() + } + + /// Return the contained data as a `Vec` + /// + /// ``` + /// # use bson::raw::DocBuf; + /// use bson::doc; + /// let docbuf = DocBuf::from_document(&doc!{}); + /// assert_eq!(docbuf.into_inner(), b"\x05\x00\x00\x00\x00".to_vec()); + /// ``` + pub fn into_inner(self) -> Vec { + self.data.to_vec() + } +} + +impl TryFrom for crate::Document { + type Error = RawError; + + fn try_from(rawdoc: DocBuf) -> RawResult { + crate::Document::try_from(rawdoc.as_ref()) + } +} + +impl<'a> IntoIterator for &'a DocBuf { + type IntoIter = DocIter<'a>; + type Item = RawResult<(&'a str, elem::Element<'a>)>; + + fn into_iter(self) -> DocIter<'a> { + DocIter { + doc: &self, + offset: 4, + } + } +} + +impl AsRef for DocBuf { + fn as_ref(&self) -> &Doc { + // SAFETY: Constructing the DocBuf checks the envelope validity of the BSON document. + unsafe { Doc::new_unchecked(&self.data) } + } +} + +impl Borrow for DocBuf { + fn borrow(&self) -> &Doc { + &*self + } +} + +impl ToOwned for Doc { + type Owned = DocBuf; + + fn to_owned(&self) -> Self::Owned { + self.to_docbuf() + } +} + +/// A BSON document, referencing raw binary data stored elsewhere. This can be created from +/// a [DocBuf] or any type that contains valid BSON data, and can be referenced as a `[u8]`, +/// including static binary literals, [Vec](std::vec::Vec), or arrays. +/// +/// Accessing elements within the `Doc` is similar to element access in [bson::Document], +/// but as the contents are parsed during iteration, instead of at creation time, format +/// errors can happen at any time during use, instead of at creation time. +/// +/// Doc can be iterated over, yielding a Result containing key-value pairs that share the +/// borrow with the source bytes instead of allocating, when necessary. +/// +/// ``` +/// # use bson::raw::{Doc, RawError}; +/// let doc = Doc::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00")?; +/// let mut iter = doc.into_iter(); +/// let (key, value) = iter.next().unwrap()?; +/// assert_eq!(key, "hi"); +/// assert_eq!(value.as_str(), Ok("y'all")); +/// assert!(iter.next().is_none()); +/// # Ok::<(), RawError>(()) +/// ``` +/// +/// Individual elements can be accessed using [`doc.get(&key)`](Doc::get), or any of +/// the `get_*` methods, like [`doc.get_object_id(&key)`](Doc::get_object_id), and +/// [`doc.get_str(&str)`](Doc::get_str). Accessing elements is an O(N) operation, +/// as it requires iterating through the document from the beginning to find the requested +/// key. +/// +/// ``` +/// # use bson::raw::{DocBuf, RawError}; +/// let docbuf = DocBuf::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +/// assert_eq!(docbuf.get_str("hi")?, Some("y'all")); +/// # Ok::<(), RawError>(()) +/// ``` +#[derive(Debug)] +pub struct Doc { + data: [u8], +} + +impl Doc { + pub fn new + ?Sized>(data: &D) -> RawResult<&Doc> { + let data = data.as_ref(); + if data.len() < 5 { + return Err(RawError::MalformedValue("document too short".into())); + } + let length = i32_from_slice(&data[..4]); + if data.len() as i32 != length { + return Err(RawError::MalformedValue("document length incorrect".into())); + } + if data[data.len() - 1] != 0 { + return Err(RawError::MalformedValue( + "document not null-terminated".into(), + )); + } + Ok(unsafe { Doc::new_unchecked(data) }) + } + + /// Create a new Doc referencing the provided data slice. + /// + /// # Safety + /// + /// The provided data must begin with a valid size + /// and end with a NUL-terminator. + /// + /// ``` + /// # use bson::raw::{Doc, RawError}; + /// let doc: &Doc = unsafe { Doc::new_unchecked(b"\x05\0\0\0\0") }; + /// ``` + pub unsafe fn new_unchecked + ?Sized>(data: &D) -> &Doc { + #[allow(unused_unsafe)] + unsafe { + &*(data.as_ref() as *const [u8] as *const Doc) + } + } + + /// Create a new DocBuf with an owned copy of the data in self. + /// + /// ``` + /// # use bson::raw::{Doc, RawError}; + /// use bson::raw::DocBuf; + /// let data = b"\x05\0\0\0\0"; + /// let doc = Doc::new(data)?; + /// let docbuf: DocBuf = doc.to_docbuf(); + /// # Ok::<(), RawError>(()) + pub fn to_docbuf(&self) -> DocBuf { + // SAFETY: The validity of the data is checked by self. + unsafe { DocBuf::new_unchecked(self.data.to_owned()) } + } + + /// Get an element from the document. Finding a particular key requires + /// iterating over the document from the beginning, so this is an O(N) + /// operation. + /// + /// Returns an error if the document is malformed. Returns `Ok(None)` + /// if the key is not found in the document. + /// + /// ``` + /// # use bson::raw::{DocBuf, elem::Element, RawError}; + /// use bson::{doc, oid::ObjectId}; + /// let docbuf = DocBuf::from_document(&doc! { + /// "_id": ObjectId::new(), + /// "f64": 2.5, + /// }); + /// let element = docbuf.get("f64")?.expect("finding key f64"); + /// assert_eq!(element.as_f64(), Ok(2.5)); + /// assert!(docbuf.get("unknown")?.is_none()); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get<'a>(&'a self, key: &str) -> OptResult> { + for result in self.into_iter() { + let (thiskey, bson) = result?; + if thiskey == key { + return Ok(Some(bson)); + } + } + Ok(None) + } + + fn get_with<'a, T>( + &'a self, + key: &str, + f: impl FnOnce(elem::Element<'a>) -> RawResult, + ) -> OptResult { + self.get(key)?.map(f).transpose() + } + + /// Get an element from the document, and convert it to f64. + /// + /// Returns an error if the document is malformed, or if the retrieved value + /// is not an f64. Returns `Ok(None)` if the key is not found in the document. + /// + /// ``` + /// # use bson::raw::{DocBuf, elem::Element, RawError}; + /// use bson::doc; + /// let docbuf = DocBuf::from_document(&doc! { + /// "bool": true, + /// "f64": 2.5, + /// }); + /// assert_eq!(docbuf.get_f64("f64"), Ok(Some(2.5))); + /// assert_eq!(docbuf.get_f64("bool"), Err(RawError::UnexpectedType)); + /// assert_eq!(docbuf.get_f64("unknown"), Ok(None)); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_f64(&self, key: &str) -> OptResult { + self.get_with(key, elem::Element::as_f64) + } + + /// Get an element from the document, and convert it to a &str. + /// + /// The returned &str is a borrowed reference into the DocBuf. To use it + /// beyond the lifetime of self, call to_docbuf() on it. + /// + /// Returns an error if the document is malformed or if the retrieved value + /// is not a string. Returns `Ok(None)` if the key is not found in the + /// document. + /// + /// ``` + /// # use bson::raw::{DocBuf, elem::Element, RawError}; + /// use bson::doc; + /// let docbuf = DocBuf::from_document(&doc! { + /// "string": "hello", + /// "bool": true, + /// }); + /// assert_eq!(docbuf.get_str("string"), Ok(Some("hello"))); + /// assert_eq!(docbuf.get_str("bool"), Err(RawError::UnexpectedType)); + /// assert_eq!(docbuf.get_str("unknown"), Ok(None)); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_str<'a>(&'a self, key: &str) -> OptResult<&'a str> { + self.get_with(key, elem::Element::as_str) + } + + /// Get an element from the document, and convert it to a [Doc]. + /// + /// The returned [Doc] is a borrowed reference into self. To use it + /// beyond the lifetime of self, call to_owned() on it. + /// + /// Returns an error if the document is malformed or if the retrieved value + /// is not a document. Returns `Ok(None)` if the key is not found in the + /// document. + /// + /// ``` + /// # use bson::raw::{DocBuf, elem::Element, RawError}; + /// use bson::doc; + /// let docbuf = DocBuf::from_document(&doc! { + /// "doc": { "key": "value"}, + /// "bool": true, + /// }); + /// assert_eq!(docbuf.get_document("doc")?.expect("finding key doc").get_str("key"), Ok(Some("value"))); + /// assert_eq!(docbuf.get_document("bool").unwrap_err(), RawError::UnexpectedType); + /// assert!(docbuf.get_document("unknown")?.is_none()); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_document<'a>(&'a self, key: &str) -> OptResult<&'a Doc> { + self.get_with(key, elem::Element::as_document) + } + + /// Get an element from the document, and convert it to an [ArrayRef]. + /// + /// The returned [ArrayRef] is a borrowed reference into the DocBuf. + /// + /// Returns an error if the document is malformed or if the retrieved value + /// is not a document. Returns `Ok(None)` if the key is not found in the + /// document. + /// + /// ``` + /// # use bson::raw::{DocBuf, elem::Element, RawError}; + /// use bson::doc; + /// let docbuf = DocBuf::from_document(&doc! { + /// "array": [true, 3, null], + /// "bool": true, + /// }); + /// let mut arriter = docbuf.get_array("array")?.expect("finding key array").into_iter(); + /// let _: bool = arriter.next().unwrap()?.as_bool()?; + /// let _: i32 = arriter.next().unwrap()?.as_i32()?; + /// let () = arriter.next().unwrap()?.as_null()?; + /// assert!(arriter.next().is_none()); + /// assert!(docbuf.get_array("bool").is_err()); + /// assert!(docbuf.get_array("unknown")?.is_none()); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_array<'a>(&'a self, key: &str) -> OptResult<&'a Array> { + self.get_with(key, elem::Element::as_array) + } + + /// Get an element from the document, and convert it to an [elem::RawBsonBinary]. + /// + /// The returned [RawBsonBinary](elem::RawBsonBinary) is a borrowed reference into the DocBuf. + /// + /// Returns an error if the document is malformed or if the retrieved value + /// is not binary data. Returns `Ok(None)` if the key is not found in the + /// document. + /// + /// ``` + /// # use bson::raw::{DocBuf, elem, RawError}; + /// use bson::{doc, Binary, spec::BinarySubtype}; + /// let docbuf = DocBuf::from_document(&doc! { + /// "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1, 2, 3] }, + /// "bool": true, + /// }); + /// assert_eq!(docbuf.get_binary("binary")?.map(elem::RawBsonBinary::as_bytes), Some(&[1, 2, 3][..])); + /// assert_eq!(docbuf.get_binary("bool").unwrap_err(), RawError::UnexpectedType); + /// assert!(docbuf.get_binary("unknown")?.is_none()); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_binary<'a>(&'a self, key: &str) -> OptResult> { + self.get_with(key, elem::Element::as_binary) + } + + /// Get an element from the document, and convert it to a [bson::oid::ObjectId]. + /// + /// Returns an error if the document is malformed or if the retrieved value + /// is not an object ID. Returns `Ok(None)` if the key is not found in the + /// document. + /// + /// ``` + /// # use bson::raw::{DocBuf, RawError}; + /// use bson::{doc, oid::ObjectId}; + /// let docbuf = DocBuf::from_document(&doc! { + /// "_id": ObjectId::new(), + /// "bool": true, + /// }); + /// let _: ObjectId = docbuf.get_object_id("_id")?.unwrap(); + /// assert_eq!(docbuf.get_object_id("bool").unwrap_err(), RawError::UnexpectedType); + /// assert!(docbuf.get_object_id("unknown")?.is_none()); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_object_id(&self, key: &str) -> OptResult { + self.get_with(key, elem::Element::as_object_id) + } + + /// Get an element from the document, and convert it to a [bool]. + /// + /// Returns an error if the document is malformed or if the retrieved value + /// is not a boolean. Returns `Ok(None)` if the key is not found in the + /// document. + /// + /// ``` + /// # use bson::raw::{DocBuf, RawError}; + /// use bson::{doc, oid::ObjectId}; + /// let docbuf = DocBuf::from_document(&doc! { + /// "_id": ObjectId::new(), + /// "bool": true, + /// }); + /// assert!(docbuf.get_bool("bool")?.unwrap()); + /// assert_eq!(docbuf.get_bool("_id").unwrap_err(), RawError::UnexpectedType); + /// assert!(docbuf.get_object_id("unknown")?.is_none()); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_bool(&self, key: &str) -> OptResult { + self.get_with(key, elem::Element::as_bool) + } + + /// Get an element from the document, and convert it to a [chrono::DateTime]. + /// + /// Returns an error if the document is malformed or if the retrieved value + /// is not a boolean. Returns `Ok(None)` if the key is not found in the + /// document. + /// + /// ``` + /// # use bson::raw::{DocBuf, RawError}; + /// use bson::doc; + /// use chrono::{Utc, Datelike, TimeZone}; + /// let docbuf = DocBuf::from_document(&doc! { + /// "created_at": Utc.ymd(2020, 3, 15).and_hms(17, 0, 0), + /// "bool": true, + /// }); + /// assert_eq!(docbuf.get_datetime("created_at")?.unwrap().year(), 2020); + /// assert_eq!(docbuf.get_datetime("bool").unwrap_err(), RawError::UnexpectedType); + /// assert!(docbuf.get_datetime("unknown")?.is_none()); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_datetime(&self, key: &str) -> OptResult> { + self.get_with(key, elem::Element::as_datetime) + } + + /// Get an element from the document, and convert it to the `()` type. + /// + /// Returns an error if the document is malformed or if the retrieved value + /// is not null. Returns `Ok(None)` if the key is not found in the + /// document. + /// + /// There is not much reason to use the () value, so this method mostly + /// exists for consistency with other element types, and as a way to assert + /// type of the element. + /// ``` + /// # use bson::raw::{DocBuf, RawError}; + /// use bson::doc; + /// let docbuf = DocBuf::from_document(&doc! { + /// "null": null, + /// "bool": true, + /// }); + /// docbuf.get_null("null")?.unwrap(); + /// assert_eq!(docbuf.get_null("bool").unwrap_err(), RawError::UnexpectedType); + /// assert!(docbuf.get_null("unknown")?.is_none()); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_null(&self, key: &str) -> OptResult<()> { + self.get_with(key, elem::Element::as_null) + } + + /// Get an element from the document, and convert it to an [elem::RawBsonRegex]. + /// + /// The [RawBsonRegex](elem::RawBsonRegex) borrows data from the DocBuf. + /// + /// Returns an error if the document is malformed or if the retrieved value + /// is not a regex. Returns `Ok(None)` if the key is not found in the + /// document. + /// ``` + /// # use bson::raw::{DocBuf, RawError, elem}; + /// use bson::{doc, Regex}; + /// let docbuf = DocBuf::from_document(&doc! { + /// "regex": Regex { + /// pattern: String::from(r"end\s*$"), + /// options: String::from("i"), + /// }, + /// "bool": true, + /// }); + /// assert_eq!(docbuf.get_regex("regex")?.unwrap().pattern(), r"end\s*$"); + /// assert_eq!(docbuf.get_regex("regex")?.unwrap().options(), "i"); + /// assert_eq!(docbuf.get_regex("bool").unwrap_err(), RawError::UnexpectedType); + /// assert!(docbuf.get_regex("unknown")?.is_none()); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_regex<'a>(&'a self, key: &str) -> OptResult> { + self.get_with(key, elem::Element::as_regex) + } + + /// Get an element from the document, and convert it to an &str representing the + /// javascript element type. + /// + /// The &str borrows data from the DocBuf. If you need an owned copy of the data, + /// you should call .to_owned() on the result. + /// + /// Returns an error if the document is malformed or if the retrieved value + /// is not a javascript code object. Returns `Ok(None)` if the key is not found + /// in the document. + /// ``` + /// # use bson::raw::{DocBuf, RawError, elem}; + /// use bson::{doc, Bson}; + /// let docbuf = DocBuf::from_document(&doc! { + /// "js": Bson::JavaScriptCode(String::from("console.log(\"hi y'all\");")), + /// "bool": true, + /// }); + /// assert_eq!(docbuf.get_javascript("js")?, Some("console.log(\"hi y'all\");")); + /// assert_eq!(docbuf.get_javascript("bool").unwrap_err(), RawError::UnexpectedType); + /// assert!(docbuf.get_javascript("unknown")?.is_none()); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_javascript<'a>(&'a self, key: &str) -> OptResult<&'a str> { + self.get_with(key, elem::Element::as_javascript) + } + + /// Get an element from the document, and convert it to an &str representing the + /// symbol element type. + /// + /// The &str borrows data from the DocBuf. If you need an owned copy of the data, + /// you should call .to_owned() on the result. + /// + /// Returns an error if the document is malformed or if the retrieved value + /// is not a symbol object. Returns `Ok(None)` if the key is not found + /// in the document. + /// ``` + /// # use bson::raw::{DocBuf, RawError, elem}; + /// use bson::{doc, Bson}; + /// let docbuf = DocBuf::from_document(&doc! { + /// "symbol": Bson::Symbol(String::from("internal")), + /// "bool": true, + /// }); + /// assert_eq!(docbuf.get_symbol("symbol")?, Some("internal")); + /// assert_eq!(docbuf.get_symbol("bool").unwrap_err(), RawError::UnexpectedType); + /// assert!(docbuf.get_symbol("unknown")?.is_none()); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_symbol<'a>(&'a self, key: &str) -> OptResult<&'a str> { + self.get_with(key, elem::Element::as_symbol) + } + + /// Get an element from the document, and extract the data as a javascript code with scope. + /// + /// The return value is a `(&str, &Doc)` where the &str represents the javascript code, + /// and the [`&Doc`](Doc) represents the scope. Both elements borrow data from the DocBuf. + /// If you need an owned copy of the data, you should call [js.to_owned()](ToOwned::to_owned) on + /// the code or [scope.to_docbuf()](Doc::to_docbuf) on the scope. + /// + /// Returns an error if the document is malformed or if the retrieved value + /// is not a javascript code with scope object. Returns `Ok(None)` if the key is not found + /// in the document. + /// ``` + /// # use bson::raw::{DocBuf, RawError, elem}; + /// use bson::{doc, JavaScriptCodeWithScope}; + /// let docbuf = DocBuf::from_document(&doc! { + /// "js": JavaScriptCodeWithScope { + /// code: String::from("console.log(\"i:\", i);"), + /// scope: doc!{"i": 42}, + /// }, + /// "bool": true, + /// }); + /// let (js, scope) = docbuf.get_javascript_with_scope("js")?.unwrap(); + /// assert_eq!(js, "console.log(\"i:\", i);"); + /// assert_eq!(scope.get_i32("i")?.unwrap(), 42); + /// assert_eq!(docbuf.get_javascript_with_scope("bool").unwrap_err(), RawError::UnexpectedType); + /// assert!(docbuf.get_javascript_with_scope("unknown")?.is_none()); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_javascript_with_scope<'a>(&'a self, key: &str) -> OptResult<(&'a str, &'a Doc)> { + self.get_with(key, elem::Element::as_javascript_with_scope) + } + + /// Get an element from the document, and convert it to i32. + /// + /// Returns an error if the document is malformed, or if the retrieved value + /// is not an i32. Returns `Ok(None)` if the key is not found in the document. + /// + /// ``` + /// # use bson::raw::{DocBuf, RawError}; + /// use bson::doc; + /// let docbuf = DocBuf::from_document(&doc! { + /// "bool": true, + /// "i32": 1_000_000, + /// }); + /// assert_eq!(docbuf.get_i32("i32"), Ok(Some(1_000_000))); + /// assert_eq!(docbuf.get_i32("bool"), Err(RawError::UnexpectedType)); + /// assert_eq!(docbuf.get_i32("unknown"), Ok(None)); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_i32(&self, key: &str) -> OptResult { + self.get_with(key, elem::Element::as_i32) + } + + /// Get an element from the document, and convert it to a timestamp. + /// + /// Returns an error if the document is malformed, or if the retrieved value + /// is not an i32. Returns `Ok(None)` if the key is not found in the document. + /// + /// ``` + /// # use bson::raw::{DocBuf, elem, RawError}; + /// use bson::{doc, Timestamp}; + /// let docbuf = DocBuf::from_document(&doc! { + /// "bool": true, + /// "ts": Timestamp { time: 649876543, increment: 9 }, + /// }); + /// let timestamp = docbuf.get_timestamp("ts")?.unwrap(); + /// + /// assert_eq!(timestamp.time(), 649876543); + /// assert_eq!(timestamp.increment(), 9); + /// assert_eq!(docbuf.get_timestamp("bool"), Err(RawError::UnexpectedType)); + /// assert_eq!(docbuf.get_timestamp("unknown"), Ok(None)); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_timestamp<'a>(&'a self, key: &str) -> OptResult> { + self.get_with(key, elem::Element::as_timestamp) + } + + /// Get an element from the document, and convert it to i64. + /// + /// Returns an error if the document is malformed, or if the retrieved value + /// is not an i64. Returns `Ok(None)` if the key is not found in the document. + /// + /// ``` + /// # use bson::raw::{DocBuf, elem::Element, RawError}; + /// use bson::doc; + /// let docbuf = DocBuf::from_document(&doc! { + /// "bool": true, + /// "i64": 9223372036854775807_i64, + /// }); + /// assert_eq!(docbuf.get_i64("i64"), Ok(Some(9223372036854775807))); + /// assert_eq!(docbuf.get_i64("bool"), Err(RawError::UnexpectedType)); + /// assert_eq!(docbuf.get_i64("unknown"), Ok(None)); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_i64(&self, key: &str) -> OptResult { + self.get_with(key, elem::Element::as_i64) + } + + /// Return a reference to the contained data as a `&[u8]` + /// + /// ``` + /// # use bson::raw::DocBuf; + /// use bson::doc; + /// let docbuf = DocBuf::from_document(&doc!{}); + /// assert_eq!(docbuf.as_bytes(), b"\x05\x00\x00\x00\x00"); + /// ``` + pub fn as_bytes(&self) -> &[u8] { + &self.data + } +} + +impl AsRef for Doc { + fn as_ref(&self) -> &Doc { + self + } +} + +impl Deref for DocBuf { + type Target = Doc; + + fn deref(&self) -> &Self::Target { + // SAFETY: The validity of the data is checked when creating DocBuf. + unsafe { Doc::new_unchecked(&self.data) } + } +} + +impl TryFrom<&Doc> for crate::Document { + type Error = RawError; + + fn try_from(rawdoc: &Doc) -> RawResult { + rawdoc + .into_iter() + .map(|res| res.and_then(|(k, v)| Ok((k.to_owned(), v.try_into()?)))) + .collect() + } +} + +impl<'a> IntoIterator for &'a Doc { + type IntoIter = DocIter<'a>; + type Item = RawResult<(&'a str, elem::Element<'a>)>; + + fn into_iter(self) -> DocIter<'a> { + DocIter { + doc: self, + offset: 4, + } + } +} + +pub struct DocIter<'a> { + doc: &'a Doc, + offset: usize, +} + +impl<'a> Iterator for DocIter<'a> { + type Item = RawResult<(&'a str, elem::Element<'a>)>; + + fn next(&mut self) -> Option)>> { + if self.offset == self.doc.data.len() - 1 { + if self.doc.data[self.offset] == 0 { + // end of document marker + return None; + } else { + return Some(Err(RawError::MalformedValue( + "document not null terminated".into(), + ))); + } + } + let key = match read_nullterminated(&self.doc.data[self.offset + 1..]) { + Ok(key) => key, + Err(err) => return Some(Err(err)), + }; + let valueoffset = self.offset + 1 + key.len() + 1; // type specifier + key + \0 + let element_type = match ElementType::from(self.doc.data[self.offset]) { + Some(et) => et, + None => { + return Some(Err(RawError::MalformedValue(format!( + "invalid tag: {}", + self.doc.data[self.offset] + )))) + } + }; + let element_size = match element_type { + ElementType::Double => 8, + ElementType::String => { + let size = + 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + if self.doc.data[valueoffset + size - 1] != 0 { + return Some(Err(RawError::MalformedValue( + "string not null terminated".into(), + ))); + } + size + } + ElementType::EmbeddedDocument => { + let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + if self.doc.data[valueoffset + size - 1] != 0 { + return Some(Err(RawError::MalformedValue( + "document not null terminated".into(), + ))); + } + size + } + ElementType::Array => { + let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + if self.doc.data[valueoffset + size - 1] != 0 { + return Some(Err(RawError::MalformedValue( + "array not null terminated".into(), + ))); + } + size + } + ElementType::Binary => { + 5 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize + } + ElementType::Undefined => 0, + ElementType::ObjectId => 12, + ElementType::Boolean => 1, + ElementType::DateTime => 8, + ElementType::Null => 0, + ElementType::RegularExpression => { + let regex = match read_nullterminated(&self.doc.data[valueoffset..]) { + Ok(regex) => regex, + Err(err) => return Some(Err(err)), + }; + let options = + match read_nullterminated(&self.doc.data[valueoffset + regex.len() + 1..]) { + Ok(options) => options, + Err(err) => return Some(Err(err)), + }; + regex.len() + options.len() + 2 + } + ElementType::DbPointer => { + let string_size = + 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + let id_size = 12; + if self.doc.data[valueoffset + string_size - 1] != 0 { + return Some(Err(RawError::MalformedValue( + "DBPointer string not null-terminated".into(), + ))); + } + string_size + id_size + } + ElementType::JavaScriptCode => { + let size = + 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + if self.doc.data[valueoffset + size - 1] != 0 { + return Some(Err(RawError::MalformedValue( + "javascript code not null-terminated".into(), + ))); + } + size + } + ElementType::Symbol => { + 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize + } + ElementType::JavaScriptCodeWithScope => { + let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + if self.doc.data[valueoffset + size - 1] != 0 { + return Some(Err(RawError::MalformedValue( + "javascript with scope not null-terminated".into(), + ))); + } + size + } + ElementType::Int32 => 4, + ElementType::Timestamp => 8, + ElementType::Int64 => 8, + ElementType::Decimal128 => 16, + ElementType::MaxKey => 0, + ElementType::MinKey => 0, + }; + let nextoffset = valueoffset + element_size; + self.offset = nextoffset; + Some(Ok(( + key, + elem::Element::new(element_type, &self.doc.data[valueoffset..nextoffset]), + ))) + } +} + +pub type ArrayRef<'a> = &'a Array; + +pub struct Array { + doc: Doc, +} + +impl Array { + pub fn new(data: &[u8]) -> RawResult<&Array> { + Ok(Array::from_doc(Doc::new(data)?)) + } + + /// Return a new Array from the provided bytes. + /// + /// # Safety + /// + /// The provided bytes must start with a valid length indicator + /// and end with a NUL terminator, as described in [the bson + /// spec](http://bsonspec.org/spec.html). + /// + /// The following is valid: + /// ``` + /// # use bson::raw::Array; + /// // Represents the array [null, 514i32], which is the same as the document + /// // {"0": null, "1": 514} + /// let bson = b"\x0f\0\0\0\x0A0\0\x101\0\x02\x02\0\0\0"; + /// let arr = unsafe { Array::new_unchecked(bson) }; + /// let mut arriter = arr.into_iter(); + /// assert!(arriter.next().unwrap().and_then(|b| b.as_null()).is_ok()); + /// assert_eq!(arriter.next().unwrap().and_then(|b| b.as_i32()).unwrap(), 514); + /// ``` + /// + /// And so is this, even though the provided document is not an array, because + /// the errors will be caught during decode. + /// + /// ``` + /// # use bson::raw::Array; + /// // Represents the document {"0": null, "X": 514} + /// let bson = b"\x0f\0\0\0\x0A0\0\x10X\0\x02\x02\0\0\0"; + /// let arr = unsafe { Array::new_unchecked(bson) }; + /// let mut arriter = arr.into_iter(); + /// assert!(arriter.next().unwrap().and_then(|b| b.as_null()).is_ok()); + /// assert!(arriter.next().unwrap().is_err()); + /// assert!(arriter.next().is_none()); + /// ``` + /// + /// # Bad: + /// + /// The following, however, indicates the wrong size for the document, and is + /// therefore unsound. + /// + /// ``` + /// # use bson::raw::Array; + /// // Contains a length indicator, that is longer than the array + /// let invalid = b"\x06\0\0\0\0"; + /// let arr: &Array = unsafe { Array::new_unchecked(invalid) }; + /// ``` + pub unsafe fn new_unchecked(data: &[u8]) -> &Array { + #[allow(unused_unsafe)] + let doc = unsafe { Doc::new_unchecked(data) }; + Array::from_doc(doc) + } + + pub fn from_doc(doc: &Doc) -> &Array { + // SAFETY: Array layout matches Doc layout + unsafe { &*(doc as *const Doc as *const Array) } + } + + pub fn get(&self, index: usize) -> OptResult> { + self.into_iter().nth(index).transpose() + } + + fn get_with<'a, T>( + &'a self, + index: usize, + f: impl FnOnce(elem::Element<'a>) -> RawResult, + ) -> OptResult { + self.get(index)?.map(f).transpose() + } + + pub fn get_f64(&self, index: usize) -> OptResult { + self.get_with(index, elem::Element::as_f64) + } + + pub fn get_str(&self, index: usize) -> OptResult<&str> { + self.get_with(index, elem::Element::as_str) + } + + pub fn get_document(&self, index: usize) -> OptResult<&Doc> { + self.get_with(index, elem::Element::as_document) + } + + pub fn get_array(&self, index: usize) -> OptResult<&Array> { + self.get_with(index, elem::Element::as_array) + } + + pub fn get_binary(&self, index: usize) -> OptResult> { + self.get_with(index, elem::Element::as_binary) + } + + pub fn get_object_id(&self, index: usize) -> OptResult { + self.get_with(index, elem::Element::as_object_id) + } + + pub fn get_bool(&self, index: usize) -> OptResult { + self.get_with(index, elem::Element::as_bool) + } + + pub fn get_datetime(&self, index: usize) -> OptResult> { + self.get_with(index, elem::Element::as_datetime) + } + + pub fn get_null(&self, index: usize) -> OptResult<()> { + self.get_with(index, elem::Element::as_null) + } + + pub fn get_regex(&self, index: usize) -> OptResult> { + self.get_with(index, elem::Element::as_regex) + } + + pub fn get_javascript(&self, index: usize) -> OptResult<&str> { + self.get_with(index, elem::Element::as_javascript) + } + + pub fn get_symbol(&self, index: usize) -> OptResult<&str> { + self.get_with(index, elem::Element::as_symbol) + } + + pub fn get_javascript_with_scope(&self, index: usize) -> OptResult<(&str, &Doc)> { + self.get_with(index, elem::Element::as_javascript_with_scope) + } + + pub fn get_i32(&self, index: usize) -> OptResult { + self.get_with(index, elem::Element::as_i32) + } + + pub fn get_timestamp(&self, index: usize) -> OptResult> { + self.get_with(index, elem::Element::as_timestamp) + } + + pub fn get_i64(&self, index: usize) -> OptResult { + self.get_with(index, elem::Element::as_i64) + } + + pub fn to_vec(&self) -> RawResult>> { + self.into_iter().collect() + } + + pub fn as_bytes(&self) -> &[u8] { + self.doc.as_bytes() + } +} + +impl TryFrom<&Array> for Vec { + type Error = RawError; + + fn try_from(arr: &Array) -> RawResult> { + arr.into_iter() + .map(|result| { + let rawbson = result?; + Bson::try_from(rawbson) + }) + .collect() + } +} + +impl<'a> IntoIterator for &'a Array { + type IntoIter = ArrayIter<'a>; + type Item = RawResult>; + + fn into_iter(self) -> ArrayIter<'a> { + ArrayIter { + dociter: self.doc.into_iter(), + index: 0, + } + } +} + +pub struct ArrayIter<'a> { + dociter: DocIter<'a>, + index: usize, +} + +impl<'a> Iterator for ArrayIter<'a> { + type Item = RawResult>; + + fn next(&mut self) -> Option>> { + let value = self.dociter.next().map(|result| { + let (key, bson) = match result { + Ok(value) => value, + Err(err) => return Err(err), + }; + + let index: usize = key + .parse() + .map_err(|_| RawError::MalformedValue("non-integer array index found".into()))?; + + if index == self.index { + Ok(bson) + } else { + Err(RawError::MalformedValue("wrong array index found".into())) + } + }); + self.index += 1; + value + } +} +/// Given a 4 byte u8 slice, return an i32 calculated from the bytes in +/// little endian order +/// +/// # Panics +/// +/// This function panics if given a slice that is not four bytes long. +fn i32_from_slice(val: &[u8]) -> i32 { + i32::from_le_bytes(val.try_into().expect("i32 is four bytes")) +} + +/// Given an 8 byte u8 slice, return an i64 calculated from the bytes in +/// little endian order +/// +/// # Panics +/// +/// This function panics if given a slice that is not eight bytes long. +fn i64_from_slice(val: &[u8]) -> i64 { + i64::from_le_bytes(val.try_into().expect("i64 is eight bytes")) +} + +/// Given a 4 byte u8 slice, return a u32 calculated from the bytes in +/// little endian order +/// +/// # Panics +/// +/// This function panics if given a slice that is not four bytes long. +fn u32_from_slice(val: &[u8]) -> u32 { + u32::from_le_bytes(val.try_into().expect("u32 is four bytes")) +} + +#[cfg(feature = "decimal128")] +fn d128_from_slice(val: &[u8]) -> Decimal128 { + // TODO: Handle Big Endian platforms + let d = + unsafe { decimal::d128::from_raw_bytes(val.try_into().expect("d128 is sixteen bytes")) }; + Decimal128::from(d) +} + +fn read_nullterminated(buf: &[u8]) -> RawResult<&str> { + let mut splits = buf.splitn(2, |x| *x == 0); + let value = splits + .next() + .ok_or_else(|| RawError::MalformedValue("no value".into()))?; + if splits.next().is_some() { + Ok(try_to_str(value)?) + } else { + Err(RawError::MalformedValue("expected null terminator".into())) + } +} + +fn read_lenencoded(buf: &[u8]) -> RawResult<&str> { + let length = i32_from_slice(&buf[..4]); + assert!(buf.len() as i32 >= length + 4); + try_to_str(&buf[4..4 + length as usize - 1]) +} + +fn try_to_str(data: &[u8]) -> RawResult<&str> { + match std::str::from_utf8(data) { + Ok(s) => Ok(s), + Err(_) => Err(RawError::Utf8EncodingError(data.into())), + } +} + +pub type DocRef<'a> = &'a Doc; + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + doc, spec::BinarySubtype, Binary, Bson, JavaScriptCodeWithScope, Regex, Timestamp, + }; + use chrono::TimeZone; + + fn to_bytes(doc: &crate::Document) -> Vec { + let mut docbytes = Vec::new(); + doc.to_writer(&mut docbytes).unwrap(); + docbytes + } + + #[test] + fn string_from_document() { + let docbytes = to_bytes(&doc! { + "this": "first", + "that": "second", + "something": "else", + }); + let rawdoc = Doc::new(&docbytes).unwrap(); + assert_eq!( + rawdoc.get("that").unwrap().unwrap().as_str().unwrap(), + "second", + ); + } + + #[test] + fn nested_document() { + let docbytes = to_bytes(&doc! { + "outer": { + "inner": "surprise", + }, + }); + let rawdoc = Doc::new(&docbytes).unwrap(); + assert_eq!( + rawdoc + .get("outer") + .expect("get doc result") + .expect("get doc option") + .as_document() + .expect("as doc") + .get("inner") + .expect("get str result") + .expect("get str option") + .as_str() + .expect("as str"), + "surprise", + ); + } + + #[test] + fn iterate() { + let docbytes = to_bytes(&doc! { + "apples": "oranges", + "peanut butter": "chocolate", + "easy as": {"do": 1, "re": 2, "mi": 3}, + }); + let rawdoc = Doc::new(&docbytes).expect("malformed bson document"); + let mut dociter = rawdoc.into_iter(); + let next = dociter.next().expect("no result").expect("invalid bson"); + assert_eq!(next.0, "apples"); + assert_eq!(next.1.as_str().expect("result was not a str"), "oranges"); + let next = dociter.next().expect("no result").expect("invalid bson"); + assert_eq!(next.0, "peanut butter"); + assert_eq!(next.1.as_str().expect("result was not a str"), "chocolate"); + let next = dociter.next().expect("no result").expect("invalid bson"); + assert_eq!(next.0, "easy as"); + let _doc = next.1.as_document().expect("result was a not a document"); + let next = dociter.next(); + assert!(next.is_none()); + } + + #[test] + fn rawdoc_to_doc() { + let docbytes = to_bytes(&doc! { + "f64": 2.5, + "string": "hello", + "document": {}, + "array": ["binary", "serialized", "object", "notation"], + "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1, 2, 3] }, + "object_id": oid::ObjectId::with_bytes([1, 2, 3, 4, 5,6,7,8,9,10, 11,12]), + "boolean": true, + "datetime": Utc::now(), + "null": Bson::Null, + "regex": Bson::RegularExpression(Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}), + "javascript": Bson::JavaScriptCode(String::from("console.log(console);")), + "symbol": Bson::Symbol(String::from("artist-formerly-known-as")), + "javascript_with_scope": Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope{ code: String::from("console.log(msg);"), scope: doc!{"ok": true}}), + "int32": 23i32, + "timestamp": Bson::Timestamp(Timestamp { time: 3542578, increment: 0 }), + "int64": 46i64, + "end": "END", + }); + + let rawdoc = Doc::new(&docbytes).expect("invalid document"); + let _doc: crate::Document = rawdoc.try_into().expect("invalid bson"); + } + + #[test] + fn f64() { + #![allow(clippy::float_cmp)] + + let rawdoc = DocBuf::from_document(&doc! {"f64": 2.5}); + assert_eq!( + rawdoc + .get("f64") + .expect("error finding key f64") + .expect("no key f64") + .as_f64() + .expect("result was not a f64"), + 2.5, + ); + } + + #[test] + fn string() { + let rawdoc = DocBuf::from_document(&doc! {"string": "hello"}); + + assert_eq!( + rawdoc + .get("string") + .expect("error finding key string") + .expect("no key string") + .as_str() + .expect("result was not a string"), + "hello", + ); + } + #[test] + fn document() { + let rawdoc = DocBuf::from_document(&doc! {"document": {}}); + + let doc = rawdoc + .get("document") + .expect("error finding key document") + .expect("no key document") + .as_document() + .expect("result was not a document"); + assert_eq!(&doc.data, [5, 0, 0, 0, 0].as_ref()); // Empty document + } + + #[test] + fn array() { + let rawdoc = + DocBuf::from_document(&doc! { "array": ["binary", "serialized", "object", "notation"]}); + + let array = rawdoc + .get("array") + .expect("error finding key array") + .expect("no key array") + .as_array() + .expect("result was not an array"); + assert_eq!(array.get_str(0), Ok(Some("binary"))); + assert_eq!(array.get_str(3), Ok(Some("notation"))); + assert_eq!(array.get_str(4), Ok(None)); + } + + #[test] + fn binary() { + let rawdoc = DocBuf::from_document(&doc! { + "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] } + }); + let binary: elem::RawBsonBinary<'_> = rawdoc + .get("binary") + .expect("error finding key binary") + .expect("no key binary") + .as_binary() + .expect("result was not a binary object"); + assert_eq!(binary.subtype, BinarySubtype::Generic); + assert_eq!(binary.data, &[1, 2, 3]); + } + + #[test] + fn object_id() { + let rawdoc = DocBuf::from_document(&doc! { + "object_id": oid::ObjectId::with_bytes([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), + }); + let oid = rawdoc + .get("object_id") + .expect("error finding key object_id") + .expect("no key object_id") + .as_object_id() + .expect("result was not an object id"); + assert_eq!(oid.to_hex(), "0102030405060708090a0b0c"); + } + + #[test] + fn boolean() { + let rawdoc = DocBuf::from_document(&doc! { + "boolean": true, + }); + + let boolean = rawdoc + .get("boolean") + .expect("error finding key boolean") + .expect("no key boolean") + .as_bool() + .expect("result was not boolean"); + + assert_eq!(boolean, true); + } + + #[test] + fn datetime() { + let rawdoc = DocBuf::from_document(&doc! { + "boolean": true, + "datetime": Utc.ymd(2000,10,31).and_hms(12, 30, 45), + }); + let datetime = rawdoc + .get("datetime") + .expect("error finding key datetime") + .expect("no key datetime") + .as_datetime() + .expect("result was not datetime"); + assert_eq!(datetime.to_rfc3339(), "2000-10-31T12:30:45+00:00"); + } + + #[test] + fn null() { + let rawdoc = DocBuf::from_document(&doc! { + "null": null, + }); + let () = rawdoc + .get("null") + .expect("error finding key null") + .expect("no key null") + .as_null() + .expect("was not null"); + } + + #[test] + fn regex() { + let rawdoc = DocBuf::from_document(&doc! { + "regex": Bson::RegularExpression(Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}), + }); + let regex = rawdoc + .get("regex") + .expect("error finding key regex") + .expect("no key regex") + .as_regex() + .expect("was not regex"); + assert_eq!(regex.pattern, r"end\s*$"); + assert_eq!(regex.options, "i"); + } + #[test] + fn javascript() { + let rawdoc = DocBuf::from_document(&doc! { + "javascript": Bson::JavaScriptCode(String::from("console.log(console);")), + }); + let js = rawdoc + .get("javascript") + .expect("error finding key javascript") + .expect("no key javascript") + .as_javascript() + .expect("was not javascript"); + assert_eq!(js, "console.log(console);"); + } + + #[test] + fn symbol() { + let rawdoc = DocBuf::from_document(&doc! { + "symbol": Bson::Symbol(String::from("artist-formerly-known-as")), + }); + + let symbol = rawdoc + .get("symbol") + .expect("error finding key symbol") + .expect("no key symbol") + .as_symbol() + .expect("was not symbol"); + assert_eq!(symbol, "artist-formerly-known-as"); + } + + #[test] + fn javascript_with_scope() { + let rawdoc = DocBuf::from_document(&doc! { + "javascript_with_scope": Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope{ code: String::from("console.log(msg);"), scope: doc!{"ok": true}}), + }); + let (js, scopedoc) = rawdoc + .get("javascript_with_scope") + .expect("error finding key javascript_with_scope") + .expect("no key javascript_with_scope") + .as_javascript_with_scope() + .expect("was not javascript with scope"); + assert_eq!(js, "console.log(msg);"); + let (scope_key, scope_value_bson) = scopedoc + .into_iter() + .next() + .expect("no next value in scope") + .expect("invalid element"); + assert_eq!(scope_key, "ok"); + let scope_value = scope_value_bson.as_bool().expect("not a boolean"); + assert_eq!(scope_value, true); + } + + #[test] + fn int32() { + let rawdoc = DocBuf::from_document(&doc! { + "int32": 23i32, + }); + let int32 = rawdoc + .get("int32") + .expect("error finding key int32") + .expect("no key int32") + .as_i32() + .expect("was not int32"); + assert_eq!(int32, 23i32); + } + + #[test] + fn timestamp() { + let rawdoc = DocBuf::from_document(&doc! { + "timestamp": Bson::Timestamp(Timestamp { time: 3542578, increment: 7 }), + }); + let ts = rawdoc + .get("timestamp") + .expect("error finding key timestamp") + .expect("no key timestamp") + .as_timestamp() + .expect("was not a timestamp"); + + assert_eq!(ts.increment(), 7); + assert_eq!(ts.time(), 3542578); + } + + #[test] + fn int64() { + let rawdoc = DocBuf::from_document(&doc! { + "int64": 46i64, + }); + let int64 = rawdoc + .get("int64") + .expect("error finding key int64") + .expect("no key int64") + .as_i64() + .expect("was not int64"); + assert_eq!(int64, 46i64); + } + #[test] + fn document_iteration() { + let docbytes = to_bytes(&doc! { + "f64": 2.5, + "string": "hello", + "document": {}, + "array": ["binary", "serialized", "object", "notation"], + "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] }, + "object_id": oid::ObjectId::with_bytes([1, 2, 3, 4, 5,6,7,8,9,10, 11,12]), + "boolean": true, + "datetime": Utc::now(), + "null": Bson::Null, + "regex": Bson::RegularExpression(Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}), + "javascript": Bson::JavaScriptCode(String::from("console.log(console);")), + "symbol": Bson::Symbol(String::from("artist-formerly-known-as")), + "javascript_with_scope": Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope{ code: String::from("console.log(msg);"), scope: doc!{"ok": true}}), + "int32": 23i32, + "timestamp": Bson::Timestamp(Timestamp { time: 3542578, increment: 0 }), + "int64": 46i64, + "end": "END", + }); + let rawdoc = unsafe { Doc::new_unchecked(&docbytes) }; + + assert_eq!( + rawdoc + .into_iter() + .collect::, RawError>>() + .expect("collecting iterated doc") + .len(), + 17 + ); + let end = rawdoc + .get("end") + .expect("error finding key end") + .expect("no key end") + .as_str() + .expect("was not str"); + assert_eq!(end, "END"); + } + + #[test] + fn into_bson_conversion() { + let docbytes = to_bytes(&doc! { + "f64": 2.5, + "string": "hello", + "document": {}, + "array": ["binary", "serialized", "object", "notation"], + "object_id": oid::ObjectId::with_bytes([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), + "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] }, + "boolean": false, + }); + let rawbson = elem::Element::new(ElementType::EmbeddedDocument, &docbytes); + let b: Bson = rawbson.try_into().expect("invalid bson"); + let doc = b.as_document().expect("not a document"); + assert_eq!(*doc.get("f64").expect("f64 not found"), Bson::Double(2.5)); + assert_eq!( + *doc.get("string").expect("string not found"), + Bson::String(String::from("hello")) + ); + assert_eq!( + *doc.get("document").expect("document not found"), + Bson::Document(doc! {}) + ); + assert_eq!( + *doc.get("array").expect("array not found"), + Bson::Array( + vec!["binary", "serialized", "object", "notation"] + .into_iter() + .map(|s| Bson::String(String::from(s))) + .collect() + ) + ); + assert_eq!( + *doc.get("object_id").expect("object_id not found"), + Bson::ObjectId(oid::ObjectId::with_bytes([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 + ])) + ); + assert_eq!( + *doc.get("binary").expect("binary not found"), + Bson::Binary(Binary { + subtype: BinarySubtype::Generic, + bytes: vec![1, 2, 3] + }) + ); + assert_eq!( + *doc.get("boolean").expect("boolean not found"), + Bson::Boolean(false) + ); + } +} + +#[cfg(test)] +mod proptests { + use proptest::prelude::*; + use std::convert::TryInto; + + use super::props::arbitrary_bson; + use super::DocBuf; + use crate::doc; + + fn to_bytes(doc: &crate::Document) -> Vec { + let mut docbytes = Vec::new(); + doc.to_writer(&mut docbytes).unwrap(); + docbytes + } + + proptest! { + #[test] + fn no_crashes(s: Vec) { + let _ = DocBuf::new(s); + } + + #[test] + fn roundtrip_bson(bson in arbitrary_bson()) { + println!("{:?}", bson); + let doc = doc!{"bson": bson}; + let raw = to_bytes(&doc); + let raw = DocBuf::new(raw); + prop_assert!(raw.is_ok()); + let raw = raw.unwrap(); + let roundtrip: Result = raw.try_into(); + prop_assert!(roundtrip.is_ok()); + let roundtrip = roundtrip.unwrap(); + prop_assert_eq!(doc, roundtrip); + } + } +} diff --git a/src/raw/props.rs b/src/raw/props.rs new file mode 100644 index 00000000..23ea21fd --- /dev/null +++ b/src/raw/props.rs @@ -0,0 +1,59 @@ +use crate::{spec::BinarySubtype, Binary, Bson, Document, JavaScriptCodeWithScope, Regex}; + +use proptest::prelude::*; + +fn arbitrary_binary_subtype() -> impl Strategy { + prop_oneof![ + Just(BinarySubtype::Generic), + Just(BinarySubtype::Function), + Just(BinarySubtype::BinaryOld), + Just(BinarySubtype::UuidOld), + Just(BinarySubtype::Uuid), + Just(BinarySubtype::Md5), + ] +} + +pub(crate) fn arbitrary_bson() -> impl Strategy { + let leaf = prop_oneof![ + Just(Bson::Null), + any::().prop_map(Bson::String), + any::().prop_map(Bson::Boolean), + any::().prop_map(Bson::Double), + any::().prop_map(Bson::Int32), + any::().prop_map(Bson::Int64), + any::<(String, String)>() + .prop_map(|(pattern, options)| Bson::RegularExpression(Regex { pattern, options })), + any::<[u8; 12]>().prop_map(|bytes| Bson::ObjectId(crate::oid::ObjectId::with_bytes(bytes))), + (arbitrary_binary_subtype(), any::>()).prop_map(|(subtype, bytes)| { + let bytes = if let BinarySubtype::BinaryOld = subtype { + // BinarySubtype::BinaryOld expects a four byte prefix, which the bson::Bson type + // leaves up to the caller. + + let mut newbytes = Vec::with_capacity(bytes.len() + 4); + newbytes.extend_from_slice(&(bytes.len() as i32).to_le_bytes()); + newbytes.extend_from_slice(&bytes); + newbytes + } else { + bytes + }; + Bson::Binary(Binary { subtype, bytes }) + }), + any::().prop_map(Bson::JavaScriptCode), + ]; + + leaf.prop_recursive(4, 256, 10, |inner| { + prop_oneof![ + prop::collection::hash_map("[^\0]*", inner.clone(), 0..12) + .prop_map(|map| Bson::Document(map.into_iter().collect())), + prop::collection::vec(inner.clone(), 0..12).prop_map(Bson::Array), + ( + prop::collection::hash_map("[^\0]*", inner, 0..12) + .prop_map(|map| map.into_iter().collect::()), + any::() + ) + .prop_map(|(scope, code)| Bson::JavaScriptCodeWithScope( + JavaScriptCodeWithScope { code, scope } + )), + ] + }) +} From 56299b35430392005959106747203a2f003ca1e0 Mon Sep 17 00:00:00 2001 From: Saghm Rossi Date: Wed, 10 Feb 2021 13:33:51 -0500 Subject: [PATCH 02/48] RUST-284 Updates to raw BSON API, implementation, and documentation --- src/raw/de.rs | 1074 ------------------------------ src/raw/de/binary.rs | 201 ------ src/raw/de/datetime.rs | 162 ----- src/raw/de/js.rs | 229 ------- src/raw/de/object_id.rs | 146 ----- src/raw/de/regex.rs | 209 ------ src/raw/elem.rs | 177 ++--- src/raw/mod.rs | 1383 +++++++++++++++++---------------------- 8 files changed, 686 insertions(+), 2895 deletions(-) delete mode 100644 src/raw/de.rs delete mode 100644 src/raw/de/binary.rs delete mode 100644 src/raw/de/datetime.rs delete mode 100644 src/raw/de/js.rs delete mode 100644 src/raw/de/object_id.rs delete mode 100644 src/raw/de/regex.rs diff --git a/src/raw/de.rs b/src/raw/de.rs deleted file mode 100644 index 8c58b8c3..00000000 --- a/src/raw/de.rs +++ /dev/null @@ -1,1074 +0,0 @@ -use serde::de::{self, DeserializeSeed, Deserializer, MapAccess, SeqAccess, Visitor}; -use serde::forward_to_deserialize_any; -use serde::Deserialize; - -use std::convert::TryInto; -use std::fmt::Debug; -use std::num::TryFromIntError; - -use crate::raw::{elem::Element, ArrayIter, Doc, DocBuf, DocIter, RawError}; -use crate::spec::ElementType; - -use object_id::RawObjectIdDeserializer; - -pub mod binary; -pub mod datetime; -pub mod js; -pub mod object_id; -pub mod regex; - -#[derive(Debug)] -pub enum Error { - Eof, - TrailingData(Vec), - EncodingError, - MalformedDocument, - UnexpectedType, - Unimplemented, - IntConversion(TryFromIntError), - Internal(String), - NotFound, - TmPErroR, -} - -impl From for Error { - fn from(err: TryFromIntError) -> Error { - Error::IntConversion(err) - } -} - -impl std::error::Error for Error {} - -impl std::fmt::Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{:?}", self) - } -} - -impl de::Error for Error { - fn custom(err: T) -> Error { - Error::Internal(format!("{}", err)) - } -} - -impl From for Error { - fn from(val: RawError) -> Error { - match val { - RawError::Utf8EncodingError(_) => Error::EncodingError, - RawError::UnexpectedType => Error::UnexpectedType, - RawError::MalformedValue(_) => Error::MalformedDocument, - } - } -} - -pub struct BsonDeserializer<'de> { - bson: Element<'de>, -} - -impl<'de> BsonDeserializer<'de> { - #[deprecated(since = "0.2.0", note = "use from_doc(&docref) instead")] - pub fn from_docref(doc: &'de Doc) -> Self { - BsonDeserializer::from_rawbson(Element::new(ElementType::EmbeddedDocument, doc.as_bytes())) - } - - pub fn from_doc(doc: &'de Doc) -> Self { - BsonDeserializer::from_rawbson(Element::new(ElementType::EmbeddedDocument, doc.as_bytes())) - } - - pub fn from_rawbson(bson: Element<'de>) -> Self { - BsonDeserializer { bson } - } -} - -#[deprecated(since = "0.2.0", note = "use from_doc(&docbuf) instead")] -pub fn from_docbuf<'de, T>(rawdoc_buf: &'de DocBuf) -> Result -where - T: Deserialize<'de>, -{ - from_doc(rawdoc_buf) -} - -pub fn from_doc<'de, T>(rawdoc: &'de Doc) -> Result -where - T: Deserialize<'de>, -{ - let mut de = BsonDeserializer::from_doc(rawdoc); - T::deserialize(&mut de) -} - -#[deprecated(since = "0.2.0", note = "use from_doc(&docref) instead")] -pub fn from_docref<'de, T>(rawdoc: &'de Doc) -> Result -where - T: Deserialize<'de>, -{ - from_doc(rawdoc) -} - -pub fn from_bytes<'de, T: 'de>(data: &'de [u8]) -> Result -where - T: Deserialize<'de>, -{ - let raw_document = Doc::new(data)?; - from_doc(raw_document) -} - -impl<'a, 'de: 'a> Deserializer<'de> for &'a mut BsonDeserializer<'de> { - type Error = Error; - - fn deserialize_any>(self, visitor: V) -> Result { - match self.bson.element_type() { - ElementType::Double => self.deserialize_f64(visitor), - ElementType::String => self.deserialize_str(visitor), - ElementType::EmbeddedDocument => self.deserialize_map(visitor), - ElementType::Array => self.deserialize_seq(visitor), - ElementType::Binary => self.deserialize_bytes(visitor), - ElementType::Undefined => self.deserialize_unit(visitor), - ElementType::ObjectId => { - self.deserialize_struct(object_id::NAME, object_id::FIELDS, visitor) - } - ElementType::Boolean => self.deserialize_bool(visitor), - ElementType::DateTime => { - self.deserialize_struct(datetime::NAME, datetime::FIELDS, visitor) - } - ElementType::Null => self.deserialize_unit(visitor), - ElementType::DbPointer => Err(Error::Unimplemented), // deserialize (&str, ObjectId), or struct - ElementType::RegularExpression => { - self.deserialize_struct(regex::NAME, regex::FIELDS, visitor) - } - ElementType::JavaScriptCode => self.deserialize_str(visitor), - ElementType::Symbol => self.deserialize_str(visitor), - ElementType::JavaScriptCodeWithScope => { - self.deserialize_struct(js::WITH_SCOPE_NAME, js::WITH_SCOPE_FIELDS, visitor) - } // deserialize (&'str, Map) or struct - ElementType::Int32 => self.deserialize_i32(visitor), - ElementType::Timestamp => self.deserialize_u64(visitor), - ElementType::Int64 => self.deserialize_i64(visitor), - ElementType::MinKey => self.deserialize_unit(visitor), - ElementType::MaxKey => self.deserialize_unit(visitor), - ElementType::Decimal128 => self.deserialize_i128(visitor), - } - } - - fn deserialize_bool>(self, visitor: V) -> Result { - visitor.visit_bool(self.bson.as_bool()?) - } - - #[cfg(feature = "u2i")] - fn deserialize_u8>(self, visitor: V) -> Result { - let val = match self.bson.element_type() { - ElementType::Int32 => self.bson.as_i32()?.try_into()?, - ElementType::Int64 => self.bson.as_i64()?.try_into()?, - _ => return Err(Error::UnexpectedType), - }; - visitor.visit_u8(val) - } - - fn deserialize_i8>(self, visitor: V) -> Result { - let val = match self.bson.element_type() { - ElementType::Int32 => self.bson.as_i32()?.try_into()?, - ElementType::Int64 => self.bson.as_i64()?.try_into()?, - _ => return Err(Error::UnexpectedType), - }; - visitor.visit_i8(val) - } - - #[cfg(feature = "u2i")] - fn deserialize_u16>(self, visitor: V) -> Result { - let val = match self.bson.element_type() { - ElementType::Int32 => self.bson.as_i32()?.try_into()?, - ElementType::Int64 => self.bson.as_i64()?.try_into()?, - _ => return Err(Error::UnexpectedType), - }; - visitor.visit_u16(val) - } - fn deserialize_i16>(self, visitor: V) -> Result { - let val = match self.bson.element_type() { - ElementType::Int32 => self.bson.as_i32()?.try_into()?, - ElementType::Int64 => self.bson.as_i64()?.try_into()?, - _ => return Err(Error::UnexpectedType), - }; - visitor.visit_i16(val) - } - - #[cfg(feature = "u2i")] - fn deserialize_u32>(self, visitor: V) -> Result { - let val = match self.bson.element_type() { - ElementType::Int32 => self.bson.as_i32()?.try_into()?, - ElementType::Int64 => self.bson.as_i64()?.try_into()?, - _ => return Err(Error::UnexpectedType), - }; - visitor.visit_u32(val) - } - - fn deserialize_i32>(self, visitor: V) -> Result { - let val = match self.bson.element_type() { - ElementType::Int32 => self.bson.as_i32()?, - ElementType::Int64 => self.bson.as_i64()?.try_into()?, - _ => return Err(Error::UnexpectedType), - }; - visitor.visit_i32(val) - } - - #[cfg(feature = "u2i")] - fn deserialize_u64>(self, visitor: V) -> Result { - let val = match self.bson.element_type() { - ElementType::Int32 => self.bson.as_i32()?.try_into()?, - ElementType::Int64 => self.bson.as_i64()?.try_into()?, - ElementType::Timestamp => self.bson.as_timestamp()?, - ElementType::DateTime => self - .bson - .as_utc_date_time()? - .timestamp_millis() - .try_into()?, - _ => return Err(Error::UnexpectedType), - }; - visitor.visit_u64(val) - } - - fn deserialize_i64>(self, visitor: V) -> Result { - let val = match self.bson.element_type() { - ElementType::Int32 => self.bson.as_i32()?.into(), - ElementType::Int64 => self.bson.as_i64()?, - ElementType::DateTime => self.bson.as_datetime()?.timestamp_millis(), - _ => return Err(Error::UnexpectedType), - }; - visitor.visit_i64(val) - } - - fn deserialize_i128>(self, visitor: V) -> Result { - let val = match self.bson.element_type() { - ElementType::Int32 => self.bson.as_i32()?.into(), - ElementType::Int64 => self.bson.as_i64()?.into(), - _ => return Err(Error::UnexpectedType), - }; - visitor.visit_i128(val) - } - - #[cfg(feature = "u2i")] - fn deserialize_u128>(self, visitor: V) -> Result { - let val = match self.bson.element_type() { - ElementType::Int32 => self.bson.as_i32()?.try_into()?, - ElementType::Int64 => self.bson.as_i64()?.try_into()?, - ElementType::Timestamp => self.bson.as_timestamp()?.into(), - _ => return Err(Error::UnexpectedType), - }; - visitor.visit_u128(val) - } - - #[cfg(not(feature = "u2i"))] - fn deserialize_u8>(self, _visitor: V) -> Result { - Err(Error::MalformedDocument) - } - - #[cfg(not(feature = "u2i"))] - fn deserialize_u16>(self, _visitor: V) -> Result { - Err(Error::MalformedDocument) - } - - #[cfg(not(feature = "u2i"))] - fn deserialize_u32>(self, _visitor: V) -> Result { - Err(Error::MalformedDocument) - } - - #[cfg(not(feature = "u2i"))] - fn deserialize_u64>(self, visitor: V) -> Result { - let val = match self.bson.element_type() { - ElementType::Timestamp => self.bson.as_timestamp()?.time() as u64, // TODO: Proper Timestamp handling - ElementType::Int64 => self.bson.as_i64()?.try_into()?, - _ => return Err(Error::UnexpectedType), - }; - visitor.visit_u64(val) - } - - fn deserialize_f32>(self, visitor: V) -> Result { - visitor.visit_f64(self.bson.as_f64()?) - } - - fn deserialize_f64>(self, visitor: V) -> Result { - visitor.visit_f64(self.bson.as_f64()?) - } - - fn deserialize_char>(self, visitor: V) -> Result { - let s = self.bson.as_str()?; - let mut chars = s.chars(); - let char = match chars.next() { - Some(char) => char, - None => return Err(Error::UnexpectedType), - }; - if chars.next().is_none() { - visitor.visit_char(char) - } else { - // Got multiple characters. - Err(Error::UnexpectedType) - } - } - - fn deserialize_str>(self, visitor: V) -> Result { - match self.bson.element_type() { - ElementType::String => visitor.visit_borrowed_str(self.bson.as_str()?), - ElementType::JavaScriptCode => visitor.visit_borrowed_str(self.bson.as_javascript()?), - ElementType::Symbol => visitor.visit_borrowed_str(self.bson.as_symbol()?), - - _ => Err(Error::MalformedDocument), - } - } - - fn deserialize_string>(self, visitor: V) -> Result { - match self.bson.element_type() { - ElementType::String => visitor.visit_str(self.bson.as_str()?), - ElementType::JavaScriptCode => visitor.visit_str(self.bson.as_javascript()?), - ElementType::Symbol => visitor.visit_str(self.bson.as_symbol()?), - ElementType::ObjectId => visitor.visit_str(&self.bson.as_object_id()?.to_hex()), - _ => Err(Error::Unimplemented), - } - } - - fn deserialize_bytes>(self, visitor: V) -> Result { - match self.bson.element_type() { - ElementType::String => { - let raw_data = self.bson.as_bytes(); - let len = i32::from_le_bytes(raw_data[0..4].try_into().expect("i32 needs 4 bytes")); - assert_eq!(raw_data.len(), len as usize + 4); - visitor.visit_borrowed_bytes(&raw_data[4..]) - } - ElementType::Binary => { - let binary = self.bson.as_binary().expect("was not binary"); - let deserializer = binary::BinaryDeserializer::new(binary); - deserializer.deserialize_bytes(visitor) - } - ElementType::Symbol => { - let raw_data = self.bson.as_bytes(); - let len = i32::from_le_bytes(raw_data[0..4].try_into().expect("i32 needs 4 bytes")); - assert_eq!(raw_data.len(), len as usize + 4); - visitor.visit_borrowed_bytes(&raw_data[4..]) - } - ElementType::ObjectId => visitor.visit_borrowed_bytes(self.bson.as_bytes()), - _ => Err(Error::MalformedDocument), - } - } - - fn deserialize_byte_buf>(self, visitor: V) -> Result { - match self.bson.element_type() { - ElementType::String => { - let raw_data = self.bson.as_bytes(); - let len = i32::from_le_bytes(raw_data[0..4].try_into().expect("i32 needs 4 bytes")); - assert_eq!(raw_data.len(), len as usize + 4); - visitor.visit_bytes(&raw_data[4..]) - } - ElementType::Binary => { - let binary = self.bson.as_binary()?; - let deserializer = binary::BinaryDeserializer::new(binary); - deserializer.deserialize_byte_buf(visitor) - } - ElementType::Symbol => { - let raw_data = self.bson.as_bytes(); - let len = i32::from_le_bytes(raw_data[0..4].try_into().expect("i32 needs 4 bytes")); - assert_eq!(raw_data.len(), len as usize + 4); - visitor.visit_bytes(&raw_data[4..]) - } - ElementType::ObjectId => visitor.visit_bytes(self.bson.as_bytes()), - _ => Err(Error::MalformedDocument), - } - } - - fn deserialize_option>(self, visitor: V) -> Result { - match self.bson.element_type() { - ElementType::Null => visitor.visit_none(), - _ => visitor.visit_some(self), - } - } - - fn deserialize_unit>(self, visitor: V) -> Result { - match self.bson.element_type() { - ElementType::Null => visitor.visit_unit(), - _ => Err(Error::MalformedDocument), - } - } - - fn deserialize_unit_struct>( - self, - _name: &str, - visitor: V, - ) -> Result { - self.deserialize_unit(visitor) - } - - fn deserialize_newtype_struct>( - self, - _name: &str, - visitor: V, - ) -> Result { - visitor.visit_newtype_struct(self) - } - - fn deserialize_seq>(self, visitor: V) -> Result { - match self.bson.element_type() { - ElementType::Array => { - let arr = self.bson.as_array()?; - let sequencer = BsonArraySequencer::new(arr.into_iter()); - visitor.visit_seq(sequencer) - } - ElementType::ObjectId => self.deserialize_byte_buf(visitor), - _ => Err::(Error::Unimplemented), - } - } - - fn deserialize_map>(self, visitor: V) -> Result { - match self.bson.element_type() { - ElementType::EmbeddedDocument => { - let doc = self.bson.as_document()?; - let mapper = BsonDocumentMap::new(doc.into_iter()); - visitor.visit_map(mapper) - } - ElementType::ObjectId => { - let mapper = RawObjectIdDeserializer::new(self.bson); - visitor.visit_map(mapper) - } - et => { - println!("Map of {:?}", et); - Err(Error::TmPErroR) - } - } - } - - fn deserialize_tuple>( - self, - len: usize, - visitor: V, - ) -> Result { - match self.bson.element_type() { - ElementType::Array => self.deserialize_seq(visitor), - ElementType::JavaScriptCodeWithScope => { - js::JavaScriptWithScopeDeserializer::new(self.bson.as_javascript_with_scope()?) - .deserialize_tuple(len, visitor) - } - ElementType::RegularExpression => { - regex::RegexDeserializer::new(self.bson.as_regex()?).deserialize_tuple(len, visitor) - } - - _ => Err(Error::TmPErroR), - } - } - - fn deserialize_tuple_struct>( - self, - _name: &str, - len: usize, - visitor: V, - ) -> Result { - self.deserialize_tuple(len, visitor) - } - - fn deserialize_struct>( - self, - name: &'static str, - fields: &'static [&'static str], - visitor: V, - ) -> Result { - if name == object_id::NAME { - object_id::RawObjectIdDeserializer::new(self.bson) - .deserialize_struct(name, fields, visitor) - } else if name == binary::NAME { - self.bson - .as_binary() - .map_err(Error::from) - .map(binary::BinaryDeserializer::new) - .and_then(|de| de.deserialize_struct(name, fields, visitor)) - } else if name == datetime::NAME { - self.bson - .as_datetime() - .map_err(Error::from) - .map(|dt| dt.timestamp_millis()) - .map(datetime::DateTimeDeserializer::new) - .and_then(|de| de.deserialize_struct(name, fields, visitor)) - } else if name == js::WITH_SCOPE_NAME { - self.bson - .as_javascript_with_scope() - .map_err(Error::from) - .map(js::JavaScriptWithScopeDeserializer::new) - .and_then(|de| de.deserialize_struct(name, fields, visitor)) - } else if name == regex::NAME { - self.bson - .as_regex() - .map_err(Error::from) - .map(regex::RegexDeserializer::new) - .and_then(|de| de.deserialize_struct(name, fields, visitor)) - } else { - self.deserialize_map(visitor) - } - } - - fn deserialize_enum>( - self, - _name: &str, - _fields: &[&str], - _visitor: V, - ) -> Result { - Err(Error::Unimplemented) - } - - fn deserialize_ignored_any>(self, visitor: V) -> Result { - visitor.visit_unit() - } - - fn deserialize_identifier>(self, visitor: V) -> Result { - self.deserialize_str(visitor) - } -} - -struct BsonArraySequencer<'de> { - arr_iter: ArrayIter<'de>, -} - -impl<'de> BsonArraySequencer<'de> { - fn new(arr_iter: ArrayIter<'de>) -> Self { - BsonArraySequencer { arr_iter } - } -} - -impl<'de> SeqAccess<'de> for BsonArraySequencer<'de> { - type Error = Error; - - fn next_element_seed(&mut self, seed: E) -> Result, Self::Error> - where - E: DeserializeSeed<'de>, - { - match self.arr_iter.next() { - Some(Ok(bson)) => { - let mut deserializer = BsonDeserializer::from_rawbson(bson); - seed.deserialize(&mut deserializer).map(Some) - } - Some(Err(err)) => Err(err.into()), - None => Ok(None), - } - } -} - -struct BsonDocumentMap<'de> { - doc_iter: DocIter<'de>, - next: Option>, -} - -impl<'de> BsonDocumentMap<'de> { - fn new(doc_iter: DocIter<'de>) -> Self { - BsonDocumentMap { - doc_iter, - next: None, - } - } -} - -impl<'de> MapAccess<'de> for BsonDocumentMap<'de> { - type Error = Error; - - fn next_key_seed(&mut self, seed: K) -> Result, Error> - where - K: DeserializeSeed<'de>, - { - match self.doc_iter.next() { - Some(Ok((key, value))) => { - self.next = Some(value); - let deserializer = StrDeserializer::new(key); - Ok(Some(seed.deserialize(deserializer)?)) - } - Some(Err(err)) => Err(err.into()), - None => Ok(None), - } - } - - fn next_value_seed(&mut self, seed: V) -> Result - where - V: DeserializeSeed<'de>, - { - let bson = self.next.take().ok_or(Error::Eof)?; - let mut deserializer = BsonDeserializer::from_rawbson(bson); - seed.deserialize(&mut deserializer) - } -} - -struct StrDeserializer<'a> { - value: &'a str, -} - -impl<'a> StrDeserializer<'a> { - fn new(value: &'a str) -> StrDeserializer<'a> { - StrDeserializer { value } - } -} - -impl<'de> Deserializer<'de> for StrDeserializer<'de> { - type Error = Error; - - fn deserialize_any>(self, visitor: V) -> Result { - visitor.visit_borrowed_str(self.value) - } - - forward_to_deserialize_any!( - bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq - bytes byte_buf map struct option unit newtype_struct - ignored_any unit_struct tuple_struct tuple enum identifier - ); -} - -#[cfg(test)] -mod tests { - use std::collections::HashMap; - - use crate::{doc, oid::ObjectId, Bson, DateTime}; - use crate::{spec::BinarySubtype, Binary, JavaScriptCodeWithScope}; - use chrono::Utc; - use serde::Deserialize; - - use crate::raw::{Doc, DocBuf}; - use super::{from_bytes, from_doc}; - - mod uuid { - use std::convert::TryInto; - use std::fmt; - - use serde::de::Visitor; - use serde::de::{Deserialize, MapAccess}; - use serde::Deserializer; - - use crate::spec::BinarySubtype; - - #[derive(Clone, Debug, Eq, PartialEq)] - pub(super) struct Uuid { - data: Vec, - } - - impl Uuid { - pub fn new(data: Vec) -> Uuid { - Uuid { data } - } - } - - impl<'de> Deserialize<'de> for Uuid { - fn deserialize(deserializer: D) -> Result>::Error> - where - D: Deserializer<'de>, - { - struct UuidVisitor; - - impl<'de> Visitor<'de> for UuidVisitor { - type Value = Uuid; - - fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - formatter.write_str("a bson uuid") - } - - fn visit_map(self, mut map: M) -> Result - where - M: MapAccess<'de>, - { - let subtype_key = map.next_key::()?; - if subtype_key.map(|dk| dk.key) != Some(super::super::binary::SUBTYPE_FIELD) - { - return Err(serde::de::Error::custom( - "BinarySubtypeKey not found in synthesized struct", - )); - } - - let subtype_value: BinarySubtypeFromU8 = map.next_value()?; - match subtype_value.subtype { - BinarySubtype::Uuid | BinarySubtype::UuidOld => {} - _ => { - return Err(serde::de::Error::custom( - "Expected binary subtype of Uuid (4) or UuidOld (3)", - )) - } - } - - let data_key = map.next_key::()?; - - if data_key.map(|dk| dk.key) != Some(super::super::binary::DATA_FIELD) { - return Err(serde::de::Error::custom( - "BinaryDataKey not found in synthesized struct", - )); - } - let data_value: BinaryDataFromBytes = map.next_value()?; - Ok(Uuid { - data: data_value.data, - }) - } - } - static FIELDS: [&str; 2] = [ - super::super::binary::SUBTYPE_FIELD, - super::super::binary::DATA_FIELD, - ]; - deserializer.deserialize_struct(super::super::binary::NAME, &FIELDS, UuidVisitor) - } - } - - struct FieldKey { - key: &'static str, - } - - impl FieldKey { - fn new(key: &'static str) -> FieldKey { - FieldKey { key } - } - } - - impl<'de> Deserialize<'de> for FieldKey { - fn deserialize(deserializer: D) -> Result>::Error> - where - D: Deserializer<'de>, - { - struct KeyVisitor; - - impl<'de> Visitor<'de> for KeyVisitor { - type Value = FieldKey; - - fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - formatter.write_str("an identifier") - } - - fn visit_str(self, s: &str) -> Result { - use super::super::binary::{DATA_FIELD, SUBTYPE_FIELD}; - if s == SUBTYPE_FIELD { - Ok(FieldKey::new(SUBTYPE_FIELD)) - } else if s == DATA_FIELD { - Ok(FieldKey::new(DATA_FIELD)) - } else { - Err(serde::de::Error::custom(format!("unexpected field: {}", s))) - } - } - } - - deserializer.deserialize_identifier(KeyVisitor) - } - } - - struct BinarySubtypeFromU8 { - subtype: BinarySubtype, - } - - impl BinarySubtypeFromU8 { - fn new(subtype_byte: u8) -> BinarySubtypeFromU8 { - let subtype = BinarySubtype::from(subtype_byte); - BinarySubtypeFromU8 { subtype } - } - } - - impl<'de> Deserialize<'de> for BinarySubtypeFromU8 { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - struct SubtypeVisitor; - - impl<'de> Visitor<'de> for SubtypeVisitor { - type Value = BinarySubtypeFromU8; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("a u8 representing a binary subtype") - } - - fn visit_u8( - self, - byte: u8, - ) -> Result { - Ok(BinarySubtypeFromU8::new(byte)) - } - fn visit_i32( - self, - int: i32, - ) -> Result { - Ok(BinarySubtypeFromU8::new( - int.try_into().map_err(|_| E::custom("non-byte integer"))?, - )) - } - } - - deserializer.deserialize_u8(SubtypeVisitor) - } - } - - struct BinaryDataFromBytes { - data: Vec, - } - - impl BinaryDataFromBytes { - fn new(data: Vec) -> BinaryDataFromBytes { - BinaryDataFromBytes { data } - } - } - - impl<'de> Deserialize<'de> for BinaryDataFromBytes { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - struct DataVisitor; - - impl<'de> Visitor<'de> for DataVisitor { - type Value = BinaryDataFromBytes; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("bytes") - } - - fn visit_bytes( - self, - bytes: &[u8], - ) -> Result { - Ok(BinaryDataFromBytes::new(bytes.to_vec())) - } - } - - deserializer.deserialize_bytes(DataVisitor) - } - } - } - - #[derive(Debug, Deserialize)] - struct Person<'a> { - #[serde(rename = "_id")] - id: ObjectId, - first_name: &'a str, - middle_name: Option, - last_name: String, - number: &'a [u8], - gid: uuid::Uuid, - has_cookies: bool, - birth_year: Option, - } - - #[test] - fn deserialize_struct() { - let mut docbytes = Vec::new(); - let doc = doc! { - "_id": ObjectId::with_string("abcdefabcdefabcdefabcdef").unwrap(), - "first_name": "Edward", - "middle_name": Bson::Null, - "last_name": "Teach", - "number": Binary { subtype: BinarySubtype::Generic, bytes: vec![8, 6, 7, 5, 3, 0, 9] }, - "has_cookies": false, - "gid": Binary { subtype: BinarySubtype::Uuid, bytes: b"12345678901234567890123456789012".to_vec() }, - "birth_year": 15.0, - }; - doc.to_writer(&mut docbytes) - .expect("could not encode document"); - let p: Person = from_bytes(&docbytes).expect("could not decode into Person struct"); - assert_eq!(p.first_name, "Edward"); - assert_eq!(p.middle_name, None); - assert_eq!(p.last_name, "Teach"); - assert_eq!(p.id.to_hex(), "abcdefabcdefabcdefabcdef"); - assert_eq!(p.number, &[8, 6, 7, 5, 3, 0, 9]); - assert_eq!(p.has_cookies, false); - assert_eq!( - p.gid, - uuid::Uuid::new(b"12345678901234567890123456789012".to_vec()) - ); - assert_eq!(p.birth_year, Some(15.0)); - } - - #[test] - fn object_id() { - let object_id = ObjectId::new(); - let doc = doc! { - "oid": object_id.clone(), - }; - let mut docbytes = Vec::new(); - doc.to_writer(&mut docbytes) - .expect("cannot serialize document"); - let as_object: HashMap = - from_bytes(&docbytes).expect("deserialize object_id"); - assert_eq!(as_object.get("oid").unwrap(), &object_id); - let as_string: HashMap = from_bytes(&docbytes).expect("deserialize string"); - assert_eq!(as_string.get("oid").unwrap(), &object_id.to_hex()); - let as_bytes: HashMap = - from_bytes(&docbytes).expect("deserialize borrowed bytes"); - assert_eq!(as_bytes.get("oid").unwrap(), &object_id.bytes()); - } - - #[test] - fn wrong_binary_type_for_uuid() { - let mut docbytes = Vec::new(); - let doc = &doc! { - "_id": ObjectId::with_string("abcdefabcdefabcdefabcdef").unwrap(), - "first_name": "Edward", - "last_name": "Teach", - "has cookies": true, - "number": Binary { subtype: BinarySubtype::BinaryOld, bytes: vec![7, 0, 0, 0, 8, 6, 7, 5, 3, 0, 9] }, - "gid": Binary { subtype: BinarySubtype::Function, bytes: b"12345678901234567890123456789012".to_vec() }, - }; - doc.to_writer(&mut docbytes) - .expect("could not encode document"); - - from_bytes::(&docbytes).expect_err("Should have failed to decode gid field"); - } - - #[test] - fn deserialize_map() { - let mut docbytes = Vec::new(); - let doc = doc! { - "this": "that", - "three": "four", - "keymaster": "gatekeeper", - }; - doc.to_writer(&mut docbytes) - .expect("could not encode document"); - - let map: HashMap<&str, &str> = - from_bytes(&docbytes).expect("could not decode into HashMap<&str, &str>"); - assert_eq!(map.len(), 3); - assert_eq!(*map.get("this").expect("key not found"), "that"); - assert_eq!(*map.get("three").expect("key not found"), "four"); - assert_eq!(*map.get("keymaster").expect("key not found"), "gatekeeper"); - - let map: HashMap = - from_bytes(&docbytes).expect("could not decode into HashMap"); - assert_eq!(map.len(), 3); - assert_eq!(map.get("this").expect("key not found"), "that"); - assert_eq!(map.get("three").expect("key not found"), "four"); - assert_eq!(map.get("keymaster").expect("key not found"), "gatekeeper"); - } - - #[test] - fn deserialize_seq() { - let mut docbytes = Vec::new(); - let doc = doc! {"array": [1i32, 2i64, 3i32, "abc"]}; - doc.to_writer(&mut docbytes) - .expect("could not encode document"); - let map: HashMap> = - from_bytes(&docbytes).expect("could not decode into HashMap"); - assert_eq!(map.len(), 1); - let arr = map.get("array").expect("key not found"); - assert_eq!(arr.get(0).expect("no index 0"), &Bson::Int32(1)); - assert_eq!(arr.get(1).expect("no index 1"), &Bson::Int64(2)); - assert_eq!(arr.get(2).expect("no index 2"), &Bson::Int32(3)); - assert_eq!(arr.get(3).expect("no index 3"), &Bson::String("abc".into())); - assert!(arr.get(4).is_none()); - } - - #[test] - fn deserialize_js_with_scope() { - let mut docbytes = Vec::new(); - let doc = doc! {"js_with_scope": JavaScriptCodeWithScope { - code: String::from("console.log(value);"), - scope: doc!{"value": "Hello world"}, - }}; - doc.to_writer(&mut docbytes) - .expect("could not encode document"); - - let rawdoc = Doc::new(&docbytes).expect("Invalid document"); - assert!(rawdoc.get_javascript_with_scope("js_with_scope").is_ok()); - let map: HashMap<&str, (&str, HashMap<&str, &str>)> = - from_doc(rawdoc).expect("could not decode js with scope"); - assert_eq!( - map.get("js_with_scope").expect("no key js_with_scope").0, - "console.log(value);" - ); - assert_eq!( - map.get("js_with_scope") - .expect("no key js_with_scope") - .1 - .get("value") - .expect("no key value"), - &"Hello world", - ); - } - - #[test] - fn deserialize_regexp() { - let mut docbytes = Vec::new(); - let doc = doc! {"regex": crate::Regex { pattern: String::from("^_id$"), options: String::from("i") } }; - doc.to_writer(&mut docbytes) - .expect("could not encode document"); - let rawdoc = Doc::new(&docbytes).expect("Invalid document"); - assert!(rawdoc.get_regex("regex").is_ok()); - let map: HashMap<&str, (&str, &str)> = from_doc(rawdoc).expect("could not decode regex"); - assert_eq!(map.get("regex").expect("no key regex").0, "^_id$"); - assert_eq!(map.get("regex").expect("no key regex").1, "i"); - } - - #[test] - fn deserialize_utc_datetime_to_struct() { - #[derive(Deserialize)] - struct Dateish { - #[serde(with = "chrono::serde::ts_milliseconds")] - utc_datetime: chrono::DateTime, - } - let mut docbytes = Vec::new(); - let doc = doc! {"utc_datetime": Bson::DateTime(Utc::now())}; - doc.to_writer(&mut docbytes) - .expect("could not encode document"); - let rawdoc = DocBuf::new(docbytes).expect("invalid document"); - assert!(rawdoc.get_datetime("utc_datetime").is_ok()); - let value: Dateish = from_doc(&rawdoc).expect("could not decode utc_datetime"); - let elapsed = Utc::now().signed_duration_since(value.utc_datetime); - // The previous now was less than half a second ago - assert!(elapsed.num_milliseconds() >= 0); - assert!(elapsed.num_milliseconds() < 500); - } - - #[test] - fn deserialize_utc_datetime_as_chrono_datetime() { - let mut docbytes = Vec::new(); - let doc = doc! {"utc_datetime": Utc::now()}; - doc.to_writer(&mut docbytes) - .expect("could not encode document"); - let rawdoc = DocBuf::new(docbytes).expect("invalid document"); - assert!(rawdoc.get_datetime("utc_datetime").is_ok()); - let map: HashMap<&str, DateTime> = - from_doc(&rawdoc).expect("could not decode utc_datetime"); - - let dt = map.get("utc_datetime").expect("no key utc_datetime"); - println!("{:?}", dt); - let dt = dt.0; - let elapsed = Utc::now().signed_duration_since(dt); - // The previous now was less than half a second ago - assert!(elapsed.num_milliseconds() >= 0); - assert!(elapsed.num_milliseconds() < 500); - } - - #[test] - fn deserialize_object_id_as_bson() { - let mut docbytes = Vec::new(); - let doc = doc! { "object_id": ObjectId::with_string("123456123456123456123456").unwrap() }; - doc.to_writer(&mut docbytes) - .expect("could not encode document"); - let rawdoc = DocBuf::new(docbytes).expect("invalid document"); - assert!(rawdoc.get_object_id("object_id").is_ok()); - let map: HashMap<&str, Bson> = from_doc(&rawdoc).expect("could not decode object_id"); - assert_eq!( - map.get("object_id").unwrap(), - &Bson::ObjectId(ObjectId::with_string("123456123456123456123456").unwrap()) - ); - } - - #[test] - fn deserialize_utc_datetime_as_bson() { - let mut docbytes = Vec::new(); - let doc = doc! {"utc_datetime": Utc::now()}; - doc.to_writer(&mut docbytes) - .expect("could not encode document"); - let rawdoc = DocBuf::new(docbytes).expect("invalid document"); - assert!(rawdoc.get_datetime("utc_datetime").is_ok()); - let map: HashMap<&str, Bson> = from_doc(&rawdoc).expect("could not decode utc_datetime"); - - let dt = map.get("utc_datetime").expect("no key utc_datetime"); - let dt = dt - .as_datetime() - .expect("value was not of type Bson::DateTime"); - let elapsed = Utc::now().signed_duration_since(*dt); - // The previous now was less than half a second ago - assert!(elapsed.num_milliseconds() >= 0); - assert!(elapsed.num_milliseconds() < 500); - } - - #[test] - fn deserialize_utc_datetime_as_i64() { - let mut docbytes = Vec::new(); - let doc = doc! {"utc_datetime": Bson::DateTime(Utc::now())}; - doc.to_writer(&mut docbytes) - .expect("could not encode document"); - let rawdoc = DocBuf::new(docbytes).expect("invalid document"); - assert!(rawdoc.get_datetime("utc_datetime").is_ok()); - let map: HashMap<&str, i64> = - from_doc(&rawdoc).expect("could not decode utc_datetime as i64"); - let _time = map.get("utc_datetime").expect("no key utc_datetime"); - } -} diff --git a/src/raw/de/binary.rs b/src/raw/de/binary.rs deleted file mode 100644 index 29a1e6e9..00000000 --- a/src/raw/de/binary.rs +++ /dev/null @@ -1,201 +0,0 @@ -use serde::de::{DeserializeSeed, Deserializer, MapAccess, Visitor}; -use serde::forward_to_deserialize_any; - -use super::Error; -use crate::raw::elem::RawBsonBinary; -use crate::spec::BinarySubtype; - -pub static SUBTYPE_FIELD: &str = "$__bson_binary_subtype"; -pub static DATA_FIELD: &str = "$__bson_binary_data"; -pub static NAME: &str = "$__bson_Binary"; - -pub(super) struct BinaryDeserializer<'de> { - binary: RawBsonBinary<'de>, - visiting: Visiting, -} - -impl<'de> BinaryDeserializer<'de> { - pub(super) fn new(binary: RawBsonBinary<'de>) -> BinaryDeserializer<'de> { - BinaryDeserializer { - binary, - visiting: Visiting::New, - } - } -} - -impl<'de> Deserializer<'de> for BinaryDeserializer<'de> { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_bytes(visitor) - } - - fn deserialize_bytes(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_borrowed_bytes(self.binary.as_bytes()) - } - - fn deserialize_byte_buf(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_bytes(self.binary.as_bytes()) - } - - fn deserialize_map>(self, visitor: V) -> Result { - visitor.visit_map(self) - } - - fn deserialize_struct>( - self, - name: &str, - _fields: &[&str], - visitor: V, - ) -> Result { - if name == NAME { - visitor.visit_map(self) - } else { - Err(Error::MalformedDocument) - } - } - - forward_to_deserialize_any!( - bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq - option unit newtype_struct - ignored_any unit_struct tuple_struct tuple enum identifier - ); -} - -enum Visiting { - New, - Subtype, - Data, - Done, -} - -impl<'de> MapAccess<'de> for BinaryDeserializer<'de> { - type Error = Error; - - fn next_key_seed(&mut self, seed: K) -> Result, Error> - where - K: DeserializeSeed<'de>, - { - match self.visiting { - Visiting::New => { - self.visiting = Visiting::Subtype; - seed.deserialize(BinaryKeyDeserializer::new(SUBTYPE_FIELD)) - .map(Some) - } - Visiting::Subtype => { - self.visiting = Visiting::Data; - seed.deserialize(BinaryKeyDeserializer::new(DATA_FIELD)) - .map(Some) - } - Visiting::Data => { - self.visiting = Visiting::Done; - Ok(None) - } - _ => Err(Error::MalformedDocument), - } - } - - fn next_value_seed(&mut self, seed: V) -> Result - where - V: DeserializeSeed<'de>, - { - match self.visiting { - Visiting::Subtype => { - seed.deserialize(BinarySubtypeDeserializer::new(self.binary.subtype())) - } - Visiting::Data => seed.deserialize(BinaryDataDeserializer::new(self.binary)), - _ => Err(Error::MalformedDocument), - } - } -} - -struct BinaryKeyDeserializer { - key: &'static str, -} - -impl BinaryKeyDeserializer { - fn new(key: &'static str) -> BinaryKeyDeserializer { - BinaryKeyDeserializer { key } - } -} - -impl<'de> Deserializer<'de> for BinaryKeyDeserializer { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_str(self.key) - } - - forward_to_deserialize_any!( - bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq - bytes byte_buf map struct option unit newtype_struct - ignored_any unit_struct tuple_struct tuple enum identifier - ); -} - -struct BinarySubtypeDeserializer { - subtype: BinarySubtype, -} - -impl BinarySubtypeDeserializer { - fn new(subtype: BinarySubtype) -> BinarySubtypeDeserializer { - BinarySubtypeDeserializer { subtype } - } -} - -impl<'de> Deserializer<'de> for BinarySubtypeDeserializer { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let subtype: u8 = self.subtype.into(); - visitor.visit_i32(subtype as i32) - } - - forward_to_deserialize_any!( - bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq - bytes byte_buf map struct option unit newtype_struct - ignored_any unit_struct tuple_struct tuple enum identifier - ); -} - -struct BinaryDataDeserializer<'de> { - binary: RawBsonBinary<'de>, -} - -impl<'de> BinaryDataDeserializer<'de> { - fn new(binary: RawBsonBinary<'de>) -> BinaryDataDeserializer<'de> { - BinaryDataDeserializer { binary } - } -} - -impl<'de> Deserializer<'de> for BinaryDataDeserializer<'de> { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_borrowed_bytes(self.binary.as_bytes()) - } - - forward_to_deserialize_any!( - bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq - bytes byte_buf map struct option unit newtype_struct - ignored_any unit_struct tuple_struct tuple enum identifier - ); -} diff --git a/src/raw/de/datetime.rs b/src/raw/de/datetime.rs deleted file mode 100644 index fd170330..00000000 --- a/src/raw/de/datetime.rs +++ /dev/null @@ -1,162 +0,0 @@ -use std::convert::TryInto; - -use serde::de::{DeserializeSeed, Deserializer, MapAccess, Visitor}; -use serde::forward_to_deserialize_any; - -use super::Error; - -pub static NAME: &str = "$__bson_DateTime"; -pub static FIELD: &str = "$date"; -pub static FIELDS: &[&str] = &[FIELD]; - -struct DateTimeKeyDeserializer { - key: &'static str, -} - -impl DateTimeKeyDeserializer { - fn new(key: &'static str) -> DateTimeKeyDeserializer { - DateTimeKeyDeserializer { key } - } -} - -impl<'de> Deserializer<'de> for DateTimeKeyDeserializer { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_str(self.key) - } - - forward_to_deserialize_any!( - bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq - bytes byte_buf map struct option unit newtype_struct - ignored_any unit_struct tuple_struct tuple enum identifier - ); -} - -pub struct DateTimeDeserializer { - data: i64, - visited: bool, -} - -impl DateTimeDeserializer { - pub fn new(data: i64) -> DateTimeDeserializer { - DateTimeDeserializer { - data, - visited: false, - } - } -} - -impl<'de> Deserializer<'de> for DateTimeDeserializer { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_struct(NAME, FIELDS, visitor) - } - - fn deserialize_i64(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_i64(self.data) - } - - fn deserialize_u64(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_u64(self.data.try_into()?) - } - - fn deserialize_map>(self, visitor: V) -> Result { - visitor.visit_map(self) - } - - fn deserialize_struct>( - self, - name: &str, - _fields: &[&str], - visitor: V, - ) -> Result { - if name == NAME { - visitor.visit_map(self) - } else { - Err(Error::MalformedDocument) - } - } - - forward_to_deserialize_any!( - bool u8 u16 u32 i8 i16 i32 f32 f64 char bytes byte_buf - option unit newtype_struct str string tuple - ignored_any seq unit_struct tuple_struct enum identifier - ); -} - -impl<'de> MapAccess<'de> for DateTimeDeserializer { - type Error = Error; - - fn next_key_seed(&mut self, seed: K) -> Result, Error> - where - K: DeserializeSeed<'de>, - { - match self.visited { - false => seed - .deserialize(DateTimeKeyDeserializer::new(FIELD)) - .map(Some), - true => Ok(None), - } - } - - fn next_value_seed(&mut self, seed: V) -> Result - where - V: DeserializeSeed<'de>, - { - match self.visited { - false => { - self.visited = true; - seed.deserialize(DateTimeFieldDeserializer::new(self.data)) - } - true => Err(Error::MalformedDocument), - } - } -} - -struct DateTimeFieldDeserializer { - data: i64, -} - -impl<'de> DateTimeFieldDeserializer { - fn new(data: i64) -> DateTimeFieldDeserializer { - DateTimeFieldDeserializer { data } - } -} - -impl<'de> Deserializer<'de> for DateTimeFieldDeserializer { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_i64(visitor) - } - - fn deserialize_i64(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_i64(self.data) - } - - forward_to_deserialize_any!( - bool u8 u16 u32 u64 i8 i16 i32 f32 f64 char seq - bytes byte_buf str string map struct option unit newtype_struct - ignored_any unit_struct tuple_struct tuple enum identifier - ); -} diff --git a/src/raw/de/js.rs b/src/raw/de/js.rs deleted file mode 100644 index 4f2caf2d..00000000 --- a/src/raw/de/js.rs +++ /dev/null @@ -1,229 +0,0 @@ -use serde::de::{DeserializeSeed, Deserializer, MapAccess, SeqAccess, Visitor}; -use serde::forward_to_deserialize_any; - -use crate::raw::Doc; -use super::{BsonDeserializer, Error}; - -pub static NAME: &str = "$__bson_JavaScript"; -pub static WITH_SCOPE_NAME: &str = "$__bson_JavaScriptWithScope"; -pub static DATA_FIELD: &str = "$__bson_javascript_data"; -pub static SCOPE_FIELD: &str = "$__bson_javascript_scope"; -pub static FIELDS: &[&str] = &[DATA_FIELD]; -pub static WITH_SCOPE_FIELDS: &[&str] = &[DATA_FIELD, SCOPE_FIELD]; - -struct JavaScriptKeyDeserializer { - key: &'static str, -} - -impl JavaScriptKeyDeserializer { - fn new(key: &'static str) -> JavaScriptKeyDeserializer { - JavaScriptKeyDeserializer { key } - } -} - -impl<'de> Deserializer<'de> for JavaScriptKeyDeserializer { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_str(self.key) - } - - forward_to_deserialize_any!( - bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq - bytes byte_buf map struct option unit newtype_struct - ignored_any unit_struct tuple_struct tuple enum identifier - ); -} - -pub(super) struct JavaScriptWithScopeDeserializer<'de> { - js: &'de str, - scope: &'de Doc, - visiting: ScopedVisiting, -} - -impl<'de> JavaScriptWithScopeDeserializer<'de> { - pub(super) fn new + ?Sized>( - data: (&'de str, &'de D), - ) -> JavaScriptWithScopeDeserializer<'de> { - JavaScriptWithScopeDeserializer { - js: data.0, - scope: data.1.as_ref(), - visiting: ScopedVisiting::Js, - } - } -} - -impl<'de> Deserializer<'de> for JavaScriptWithScopeDeserializer<'de> { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_seq(visitor) - } - - fn deserialize_str(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_borrowed_str(self.js) - } - - fn deserialize_string(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_str(self.js) - } - - fn deserialize_seq(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_seq(self) - } - - fn deserialize_tuple(self, ct: usize, visitor: V) -> Result - where - V: Visitor<'de>, - { - if ct != 2 { - Err(Error::MalformedDocument) - } else { - visitor.visit_seq(self) - } - } - - fn deserialize_map>(self, visitor: V) -> Result { - visitor.visit_map(self) - } - - fn deserialize_struct>( - self, - name: &str, - _fields: &[&str], - visitor: V, - ) -> Result { - if name == NAME { - visitor.visit_map(self) - } else { - Err(Error::MalformedDocument) - } - } - - forward_to_deserialize_any!( - bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char bytes byte_buf - option unit newtype_struct - ignored_any unit_struct tuple_struct enum identifier - ); -} - -enum ScopedVisiting { - Js, - Scope, - Done, -} - -impl<'de> SeqAccess<'de> for JavaScriptWithScopeDeserializer<'de> { - type Error = Error; - - fn next_element_seed(&mut self, seed: E) -> Result, Error> - where - E: DeserializeSeed<'de>, - { - match self.visiting { - ScopedVisiting::Js => { - self.visiting = ScopedVisiting::Scope; - seed.deserialize(JavaScriptWithScopeJsDeserializer::new(self.js)) - .map(Some) - } - ScopedVisiting::Scope => { - self.visiting = ScopedVisiting::Done; - seed.deserialize(&mut BsonDeserializer::from_doc(&self.scope)) - .map(Some) - } - ScopedVisiting::Done => Ok(None), - } - } -} - -impl<'de> MapAccess<'de> for JavaScriptWithScopeDeserializer<'de> { - type Error = Error; - - fn next_key_seed(&mut self, seed: K) -> Result, Error> - where - K: DeserializeSeed<'de>, - { - match self.visiting { - ScopedVisiting::Js => seed - .deserialize(JavaScriptKeyDeserializer::new(DATA_FIELD)) - .map(Some), - ScopedVisiting::Scope => seed - .deserialize(JavaScriptKeyDeserializer::new(SCOPE_FIELD)) - .map(Some), - ScopedVisiting::Done => Ok(None), - } - } - - fn next_value_seed(&mut self, seed: V) -> Result - where - V: DeserializeSeed<'de>, - { - match self.visiting { - ScopedVisiting::Js => { - self.visiting = ScopedVisiting::Scope; - seed.deserialize(JavaScriptWithScopeJsDeserializer::new(self.js)) - } - ScopedVisiting::Scope => { - self.visiting = ScopedVisiting::Done; - seed.deserialize(&mut BsonDeserializer::from_doc(self.scope)) - } - ScopedVisiting::Done => Err(Error::MalformedDocument), - } - } -} - -struct JavaScriptWithScopeJsDeserializer<'de> { - data: &'de str, -} - -impl<'de> JavaScriptWithScopeJsDeserializer<'de> { - fn new(data: &'de str) -> JavaScriptWithScopeJsDeserializer<'de> { - JavaScriptWithScopeJsDeserializer { data } - } -} - -impl<'de> Deserializer<'de> for JavaScriptWithScopeJsDeserializer<'de> { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_borrowed_str(self.data) - } - - fn deserialize_str(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_borrowed_str(self.data) - } - - fn deserialize_string(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_str(self.data) - } - - forward_to_deserialize_any!( - bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char seq - bytes byte_buf map struct option unit newtype_struct - ignored_any unit_struct tuple_struct tuple enum identifier - ); -} diff --git a/src/raw/de/object_id.rs b/src/raw/de/object_id.rs deleted file mode 100644 index 03e10ed2..00000000 --- a/src/raw/de/object_id.rs +++ /dev/null @@ -1,146 +0,0 @@ -// ObjectId handling - -use serde::de::{DeserializeSeed, Deserializer, MapAccess, Visitor}; -use serde::forward_to_deserialize_any; - -use super::Error; -use crate::raw::elem::Element; -use crate::spec::ElementType; - -pub static FIELD: &str = "$oid"; -pub static FIELDS: &[&str] = &[FIELD]; -pub static NAME: &str = "$__bson_ObjectId"; - -pub struct RawObjectIdDeserializer<'de> { - bson: Element<'de>, - visited: bool, -} - -impl<'de> RawObjectIdDeserializer<'de> { - pub fn new(bson: Element<'de>) -> RawObjectIdDeserializer<'de> { - RawObjectIdDeserializer { - bson, - visited: false, - } - } -} - -impl<'de> Deserializer<'de> for RawObjectIdDeserializer<'de> { - type Error = Error; - - fn deserialize_any>(self, visitor: V) -> Result { - match self.bson.element_type() { - ElementType::ObjectId => self.deserialize_struct(NAME, FIELDS, visitor), - _ => Err(Error::MalformedDocument), - } - } - - fn deserialize_bytes>(self, visitor: V) -> Result { - match self.bson.element_type() { - ElementType::ObjectId => visitor.visit_bytes(self.bson.as_bytes()), - _ => Err(Error::MalformedDocument), - } - } - - fn deserialize_map>(self, visitor: V) -> Result { - visitor.visit_map(self) - } - - fn deserialize_struct>( - self, - name: &'static str, - fields: &'static [&'static str], - visitor: V, - ) -> Result { - if name == NAME && fields == FIELDS { - visitor.visit_map(self) - } else { - Err(Error::MalformedDocument) - } - } - - forward_to_deserialize_any!( - bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq - byte_buf option unit newtype_struct - ignored_any unit_struct tuple_struct tuple enum identifier - ); -} - -impl<'de> MapAccess<'de> for RawObjectIdDeserializer<'de> { - type Error = Error; - - fn next_key_seed( - &mut self, - seed: K, - ) -> Result>::Value>, Self::Error> - where - K: DeserializeSeed<'de>, - { - if self.visited { - Ok(None) - } else { - self.visited = true; - seed.deserialize(ObjectIdKeyDeserializer).map(Some) - } - } - - fn next_value_seed( - &mut self, - seed: V, - ) -> Result<>::Value, Self::Error> - where - V: DeserializeSeed<'de>, - { - seed.deserialize(ObjectIdValueDeserializer::new(self.bson)) - } -} - -pub(crate) struct ObjectIdKeyDeserializer; - -impl<'de> Deserializer<'de> for ObjectIdKeyDeserializer { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_borrowed_str(FIELD) - } - - forward_to_deserialize_any!( - bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq - bytes byte_buf map struct option unit newtype_struct - ignored_any unit_struct tuple_struct tuple enum identifier - ); -} - -struct ObjectIdValueDeserializer<'de>(Element<'de>); - -impl<'de> ObjectIdValueDeserializer<'de> { - fn new(bson: Element<'de>) -> ObjectIdValueDeserializer<'de> { - ObjectIdValueDeserializer(bson) - } -} - -impl<'de> Deserializer<'de> for ObjectIdValueDeserializer<'de> { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - match self.0.element_type() { - ElementType::ObjectId => { - let hex = self.0.as_object_id()?.to_hex(); - visitor.visit_string(hex) - } - _ => Err(Error::MalformedDocument), - } - } - - forward_to_deserialize_any!( - bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq - bytes byte_buf map struct option unit newtype_struct - ignored_any unit_struct tuple_struct tuple enum identifier - ); -} diff --git a/src/raw/de/regex.rs b/src/raw/de/regex.rs deleted file mode 100644 index 5ae74258..00000000 --- a/src/raw/de/regex.rs +++ /dev/null @@ -1,209 +0,0 @@ -use serde::de::{DeserializeSeed, Deserializer, MapAccess, SeqAccess, Visitor}; -use serde::forward_to_deserialize_any; - -use super::Error; -use crate::raw::elem::RawBsonRegex; - -pub static NAME: &str = "$__bson_Regex"; -pub static REGEXP_FIELD: &str = "$__bson_regexp_regexp"; -pub static OPTIONS_FIELD: &str = "$__bson_regexp_options"; -pub static FIELDS: &[&str] = &[REGEXP_FIELD, OPTIONS_FIELD]; - -struct RegexKeyDeserializer { - key: &'static str, -} - -impl RegexKeyDeserializer { - fn new(key: &'static str) -> RegexKeyDeserializer { - RegexKeyDeserializer { key } - } -} - -impl<'de> Deserializer<'de> for RegexKeyDeserializer { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_str(self.key) - } - - forward_to_deserialize_any!( - bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq - bytes byte_buf map struct option unit newtype_struct - ignored_any unit_struct tuple_struct tuple enum identifier - ); -} - -pub(super) struct RegexDeserializer<'de> { - data: RawBsonRegex<'de>, - visiting: Visiting, -} - -impl<'de> RegexDeserializer<'de> { - pub(super) fn new(data: RawBsonRegex<'de>) -> RegexDeserializer<'de> { - RegexDeserializer { - data, - visiting: Visiting::Regex, - } - } -} - -impl<'de> Deserializer<'de> for RegexDeserializer<'de> { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_seq(visitor) - } - - fn deserialize_seq(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_seq(self) - } - - fn deserialize_tuple(self, ct: usize, visitor: V) -> Result - where - V: Visitor<'de>, - { - if ct == 2 { - visitor.visit_seq(self) - } else { - Err(Error::MalformedDocument) - } - } - - fn deserialize_map>(self, visitor: V) -> Result { - visitor.visit_map(self) - } - - fn deserialize_struct>( - self, - name: &str, - _fields: &[&str], - visitor: V, - ) -> Result { - if name == NAME { - visitor.visit_map(self) - } else { - Err(Error::MalformedDocument) - } - } - - forward_to_deserialize_any!( - bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char bytes byte_buf - option unit newtype_struct str string - ignored_any unit_struct tuple_struct enum identifier - ); -} - -enum Visiting { - Regex, - Options, - Done, -} - -impl<'de> SeqAccess<'de> for RegexDeserializer<'de> { - type Error = Error; - - fn next_element_seed(&mut self, seed: E) -> Result, Error> - where - E: DeserializeSeed<'de>, - { - match self.visiting { - Visiting::Regex => { - self.visiting = Visiting::Options; - seed.deserialize(RegexFieldDeserializer::new(self.data.pattern())) - .map(Some) - } - Visiting::Options => { - self.visiting = Visiting::Done; - seed.deserialize(RegexFieldDeserializer::new(self.data.options())) - .map(Some) - } - Visiting::Done => Ok(None), - } - } -} - -impl<'de> MapAccess<'de> for RegexDeserializer<'de> { - type Error = Error; - - fn next_key_seed(&mut self, seed: K) -> Result, Error> - where - K: DeserializeSeed<'de>, - { - match self.visiting { - Visiting::Regex => seed - .deserialize(RegexKeyDeserializer::new(REGEXP_FIELD)) - .map(Some), - Visiting::Options => seed - .deserialize(RegexKeyDeserializer::new(OPTIONS_FIELD)) - .map(Some), - Visiting::Done => Ok(None), - } - } - - fn next_value_seed(&mut self, seed: V) -> Result - where - V: DeserializeSeed<'de>, - { - match self.visiting { - Visiting::Regex => { - self.visiting = Visiting::Options; - seed.deserialize(RegexFieldDeserializer::new(self.data.pattern())) - } - Visiting::Options => { - self.visiting = Visiting::Done; - seed.deserialize(RegexFieldDeserializer::new(self.data.options())) - } - Visiting::Done => Err(Error::MalformedDocument), - } - } -} - -struct RegexFieldDeserializer<'de> { - data: &'de str, -} - -impl<'de> RegexFieldDeserializer<'de> { - fn new(data: &'de str) -> RegexFieldDeserializer<'de> { - RegexFieldDeserializer { data } - } -} - -impl<'de> Deserializer<'de> for RegexFieldDeserializer<'de> { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_borrowed_str(self.data) - } - - fn deserialize_str(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_borrowed_str(self.data) - } - - fn deserialize_string(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_str(self.data) - } - - forward_to_deserialize_any!( - bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char seq - bytes byte_buf map struct option unit newtype_struct - ignored_any unit_struct tuple_struct tuple enum identifier - ); -} diff --git a/src/raw/elem.rs b/src/raw/elem.rs index e38ebdee..64221936 100644 --- a/src/raw/elem.rs +++ b/src/raw/elem.rs @@ -3,38 +3,50 @@ use std::{ time::Duration, }; -use crate::oid; -pub use crate::spec::{BinarySubtype, ElementType}; use chrono::{DateTime, TimeZone, Utc}; #[cfg(feature = "decimal128")] use super::d128_from_slice; use super::{ - i32_from_slice, i64_from_slice, read_lenencoded, read_nullterminated, u32_from_slice, Array, - Doc, RawError, RawResult, + i32_from_slice, + i64_from_slice, + read_lenencoded, + read_nullterminated, + u32_from_slice, + RawArray, + RawDocumentRef, + RawError, + RawResult, +}; +use crate::{ + oid::ObjectId, + spec::{BinarySubtype, ElementType}, + Bson, }; +/// A BSON value referencing raw bytes stored elsewhere. #[derive(Clone, Copy, Debug)] -pub struct Element<'a> { +pub struct RawBson<'a> { element_type: ElementType, data: &'a [u8], } -impl<'a> Element<'a> { - // This is not public. An Element object can only be created by iterating over a bson document method - // on RawBsonDoc - pub(super) fn new(element_type: ElementType, data: &'a [u8]) -> Element<'a> { - Element { element_type, data } +impl<'a> RawBson<'a> { + pub(super) fn new(element_type: ElementType, data: &'a [u8]) -> RawBson<'a> { + RawBson { element_type, data } } + /// Gets the type of the value. pub fn element_type(self) -> ElementType { self.element_type } + /// Gets a reference to the raw bytes of the value. pub fn as_bytes(self) -> &'a [u8] { self.data } + /// Gets the f64 that's referenced or returns an error if the value isn't a BSON double. pub fn as_f64(self) -> RawResult { if let ElementType::Double = self.element_type { Ok(f64::from_bits(u64::from_le_bytes( @@ -47,6 +59,7 @@ impl<'a> Element<'a> { } } + /// Gets the string that's referenced or returns an error if the value isn't a BSON string. pub fn as_str(self) -> RawResult<&'a str> { if let ElementType::String = self.element_type { read_lenencoded(self.data) @@ -55,23 +68,26 @@ impl<'a> Element<'a> { } } - pub fn as_document(self) -> RawResult<&'a Doc> { + /// Gets the document that's referenced or returns an error if the value isn't a BSON document. + pub fn as_document(self) -> RawResult<&'a RawDocumentRef> { if let ElementType::EmbeddedDocument = self.element_type { - Doc::new(self.data) + RawDocumentRef::new(self.data) } else { Err(RawError::UnexpectedType) } } - pub fn as_array(self) -> RawResult<&'a Array> { + /// Gets the array that's referenced or returns an error if the value isn't a BSON array. + pub fn as_array(self) -> RawResult<&'a RawArray> { if let ElementType::Array = self.element_type { - Array::new(self.data) + RawArray::new(self.data) } else { Err(RawError::UnexpectedType) } } - pub fn as_binary(self) -> RawResult> { + /// Gets the BSON binary value that's referenced or returns an error if the value a BSON binary. + pub fn as_binary(self) -> RawResult> { if let ElementType::Binary = self.element_type { let length = i32_from_slice(&self.data[0..4]); let subtype = BinarySubtype::from(self.data[4]); // TODO: This mishandles reserved values @@ -97,15 +113,16 @@ impl<'a> Element<'a> { } _ => &self.data[5..], }; - Ok(RawBsonBinary::new(subtype, data)) + Ok(RawBinary::new(subtype, data)) } else { Err(RawError::UnexpectedType) } } - pub fn as_object_id(self) -> RawResult { + /// Gets the ObjectId that's referenced or returns an error if the value isn't a BSON ObjectId. + pub fn as_object_id(self) -> RawResult { if let ElementType::ObjectId = self.element_type { - Ok(oid::ObjectId::with_bytes(self.data.try_into().map_err( + Ok(ObjectId::with_bytes(self.data.try_into().map_err( |_| RawError::MalformedValue("object id should be 12 bytes long".into()), )?)) } else { @@ -113,6 +130,7 @@ impl<'a> Element<'a> { } } + /// Gets the boolean that's referenced or returns an error if the value isn't a BSON boolean. pub fn as_bool(self) -> RawResult { if let ElementType::Boolean = self.element_type { if self.data.len() != 1 { @@ -131,6 +149,7 @@ impl<'a> Element<'a> { } } + /// Gets the DateTime that's referenced or returns an error if the value isn't a BSON DateTime. pub fn as_datetime(self) -> RawResult> { if let ElementType::DateTime = self.element_type { let millis = i64_from_slice(self.data); @@ -156,22 +175,17 @@ impl<'a> Element<'a> { } } - pub fn as_null(self) -> RawResult<()> { - if let ElementType::Null = self.element_type { - Ok(()) - } else { - Err(RawError::UnexpectedType) - } - } - - pub fn as_regex(self) -> RawResult> { + /// Gets the regex that's referenced or returns an error if the value isn't a BSON regex. + pub fn as_regex(self) -> RawResult> { if let ElementType::RegularExpression = self.element_type { - RawBsonRegex::new(self.data) + RawRegex::new(self.data) } else { Err(RawError::UnexpectedType) } } + /// Gets the BSON JavaScript code that's referenced or returns an error if the value isn't BSON + /// JavaScript code. pub fn as_javascript(self) -> RawResult<&'a str> { if let ElementType::JavaScriptCode = self.element_type { read_lenencoded(self.data) @@ -180,6 +194,7 @@ impl<'a> Element<'a> { } } + /// Gets the symbol that's referenced or returns an error if the value isn't a BSON symbol. pub fn as_symbol(self) -> RawResult<&'a str> { if let ElementType::Symbol = self.element_type { read_lenencoded(self.data) @@ -188,13 +203,15 @@ impl<'a> Element<'a> { } } - pub fn as_javascript_with_scope(self) -> RawResult<(&'a str, &'a Doc)> { + /// Gets the BSON JavaScript code with scope that's referenced or returns an error if the value + /// isn't BSON JavaScript code with scope. + pub fn as_javascript_with_scope(self) -> RawResult<(&'a str, &'a RawDocumentRef)> { if let ElementType::JavaScriptCodeWithScope = self.element_type { let length = i32_from_slice(&self.data[..4]); assert_eq!(self.data.len() as i32, length); let js = read_lenencoded(&self.data[4..])?; - let doc = Doc::new(&self.data[9 + js.len()..])?; + let doc = RawDocumentRef::new(&self.data[9 + js.len()..])?; Ok((js, doc)) } else { @@ -202,24 +219,28 @@ impl<'a> Element<'a> { } } - pub fn as_i32(self) -> RawResult { - if let ElementType::Int32 = self.element_type { - assert_eq!(self.data.len(), 4); - Ok(i32_from_slice(self.data)) + /// Gets the timestamp that's referenced or returns an error if the value isn't a BSON + /// timestamp. + pub fn as_timestamp(self) -> RawResult> { + if let ElementType::Timestamp = self.element_type { + assert_eq!(self.data.len(), 8); + Ok(RawTimestamp { data: self.data }) } else { Err(RawError::UnexpectedType) } } - pub fn as_timestamp(self) -> RawResult> { - if let ElementType::Timestamp = self.element_type { - assert_eq!(self.data.len(), 8); - Ok(RawBsonTimestamp { data: self.data }) + /// Gets the i32 that's referenced or returns an error if the value isn't a BSON int32. + pub fn as_i32(self) -> RawResult { + if let ElementType::Int32 = self.element_type { + assert_eq!(self.data.len(), 4); + Ok(i32_from_slice(self.data)) } else { Err(RawError::UnexpectedType) } } + /// Gets the i64 that's referenced or returns an error if the value isn't a BSON int64. pub fn as_i64(self) -> RawResult { if let ElementType::Int64 = self.element_type { assert_eq!(self.data.len(), 8); @@ -229,8 +250,9 @@ impl<'a> Element<'a> { } } + /// Gets the decimal that's referenced or returns an error if the value isn't a BSON Decimal128. #[cfg(feature = "decimal128")] - pub fn as_decimal128(self) -> RawResult { + pub fn as_decimal128(self) -> RawResult { if let ElementType::Decimal128 = self.element_type { assert_eq!(self.data.len(), 16); Ok(d128_from_slice(self.data)) @@ -240,66 +262,66 @@ impl<'a> Element<'a> { } } -impl<'a> TryFrom> for crate::Bson { +impl<'a> TryFrom> for Bson { type Error = RawError; - fn try_from(rawbson: Element<'a>) -> RawResult { + fn try_from(rawbson: RawBson<'a>) -> RawResult { Ok(match rawbson.element_type { - ElementType::Double => crate::Bson::Double(rawbson.as_f64()?), - ElementType::String => crate::Bson::String(String::from(rawbson.as_str()?)), + ElementType::Double => Bson::Double(rawbson.as_f64()?), + ElementType::String => Bson::String(String::from(rawbson.as_str()?)), ElementType::EmbeddedDocument => { let rawdoc = rawbson.as_document()?; let doc = rawdoc.try_into()?; - crate::Bson::Document(doc) + Bson::Document(doc) } ElementType::Array => { let rawarray = rawbson.as_array()?; let v = rawarray.try_into()?; - crate::Bson::Array(v) + Bson::Array(v) } ElementType::Binary => { - let RawBsonBinary { subtype, data } = rawbson.as_binary()?; - crate::Bson::Binary(crate::Binary { + let RawBinary { subtype, data } = rawbson.as_binary()?; + Bson::Binary(crate::Binary { subtype, bytes: data.to_vec(), }) } - ElementType::ObjectId => crate::Bson::ObjectId(rawbson.as_object_id()?), - ElementType::Boolean => crate::Bson::Boolean(rawbson.as_bool()?), - ElementType::DateTime => crate::Bson::DateTime(rawbson.as_datetime()?), - ElementType::Null => crate::Bson::Null, + ElementType::ObjectId => Bson::ObjectId(rawbson.as_object_id()?), + ElementType::Boolean => Bson::Boolean(rawbson.as_bool()?), + ElementType::DateTime => Bson::DateTime(rawbson.as_datetime()?), + ElementType::Null => Bson::Null, ElementType::RegularExpression => { let rawregex = rawbson.as_regex()?; - crate::Bson::RegularExpression(crate::Regex { + Bson::RegularExpression(crate::Regex { pattern: String::from(rawregex.pattern()), options: String::from(rawregex.options()), }) } ElementType::JavaScriptCode => { - crate::Bson::JavaScriptCode(String::from(rawbson.as_javascript()?)) + Bson::JavaScriptCode(String::from(rawbson.as_javascript()?)) } - ElementType::Int32 => crate::Bson::Int32(rawbson.as_i32()?), + ElementType::Int32 => Bson::Int32(rawbson.as_i32()?), ElementType::Timestamp => { - // RawBson::as_timestamp() returns u64, but crate::Bson::Timestamp expects i64 + // RawBson::as_timestamp() returns u64, but Bson::Timestamp expects i64 let ts = rawbson.as_timestamp()?; - crate::Bson::Timestamp(crate::Timestamp { + Bson::Timestamp(crate::Timestamp { time: ts.time(), increment: ts.increment(), }) } - ElementType::Int64 => crate::Bson::Int64(rawbson.as_i64()?), - ElementType::Undefined => crate::Bson::Null, + ElementType::Int64 => Bson::Int64(rawbson.as_i64()?), + ElementType::Undefined => Bson::Null, ElementType::DbPointer => panic!("Uh oh. Maybe this should be a TryFrom"), - ElementType::Symbol => crate::Bson::Symbol(String::from(rawbson.as_symbol()?)), + ElementType::Symbol => Bson::Symbol(String::from(rawbson.as_symbol()?)), ElementType::JavaScriptCodeWithScope => { let (js, scope) = rawbson.as_javascript_with_scope()?; - crate::Bson::JavaScriptCodeWithScope(crate::JavaScriptCodeWithScope { + Bson::JavaScriptCodeWithScope(crate::JavaScriptCodeWithScope { code: String::from(js), scope: scope.try_into()?, }) } #[cfg(feature = "decimal128")] - ElementType::Decimal128 => crate::Bson::Decimal128(rawbson.as_decimal128()?), + ElementType::Decimal128 => Bson::Decimal128(rawbson.as_decimal128()?), #[cfg(not(feature = "decimal128"))] ElementType::Decimal128 => return Err(RawError::UnexpectedType), @@ -309,40 +331,42 @@ impl<'a> TryFrom> for crate::Bson { } } +/// A BSON binary value referencing raw bytes stored elsewhere. #[derive(Clone, Copy, Debug)] -pub struct RawBsonBinary<'a> { +pub struct RawBinary<'a> { pub(super) subtype: BinarySubtype, pub(super) data: &'a [u8], } -impl<'a> RawBsonBinary<'a> { - pub fn new(subtype: BinarySubtype, data: &'a [u8]) -> RawBsonBinary<'a> { - RawBsonBinary { subtype, data } +impl<'a> RawBinary<'a> { + fn new(subtype: BinarySubtype, data: &'a [u8]) -> RawBinary<'a> { + RawBinary { subtype, data } } - /// Return the BinarySubtype. + /// Gets the subtype of the binary value. pub fn subtype(self) -> BinarySubtype { self.subtype } - /// Return the binary data as raw bytes. + /// Gets the contained bytes of the binary value. pub fn as_bytes(self) -> &'a [u8] { self.data } } +/// A BSON regex referencing raw bytes stored elsewhere. #[derive(Clone, Copy, Debug)] -pub struct RawBsonRegex<'a> { +pub struct RawRegex<'a> { pub(super) pattern: &'a str, pub(super) options: &'a str, } -impl<'a> RawBsonRegex<'a> { - pub fn new(data: &'a [u8]) -> RawResult> { +impl<'a> RawRegex<'a> { + pub fn new(data: &'a [u8]) -> RawResult> { let pattern = read_nullterminated(data)?; let opts = read_nullterminated(&data[pattern.len() + 1..])?; if pattern.len() + opts.len() == data.len() - 2 { - Ok(RawBsonRegex { + Ok(RawRegex { pattern, options: opts, }) @@ -362,21 +386,24 @@ impl<'a> RawBsonRegex<'a> { } } +/// A BSON timestamp referencing raw bytes stored elsewhere. #[derive(Clone, Copy, Debug, PartialEq)] -pub struct RawBsonTimestamp<'a> { +pub struct RawTimestamp<'a> { data: &'a [u8], } -impl<'a> RawBsonTimestamp<'a> { +impl<'a> RawTimestamp<'a> { /// Return the time portion of the timestamp. pub fn time(&self) -> u32 { - // RawBsonTimestamp can only be constructed with the correct data length, so this should always succeed. + // RawBsonTimestamp can only be constructed with the correct data length, so this should + // always succeed. u32_from_slice(&self.data[4..8]) } /// Return the increment portion of the timestamp. pub fn increment(&self) -> u32 { - // RawBsonTimestamp can only be constructed with the correct data length, so this should always succeed. + // RawBsonTimestamp can only be constructed with the correct data length, so this should + // always succeed. u32_from_slice(&self.data[0..4]) } } diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 8f6a1e37..ad842c97 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -1,143 +1,110 @@ -/*! -A rawbson document can be created from a `Vec` containing raw BSON data, and elements -accessed via methods similar to those in the [bson-rust](https://crates.io/crate/bson-rust) -crate. Note that rawbson returns a Result>, since the bytes contained in the -document are not fully validated until trying to access the contained data. - -```rust -use bson::raw::{ - DocBuf, - elem, -}; - -// \x13\x00\x00\x00 // total document size -// \x02 // 0x02 = type String -// hi\x00 // field name -// \x06\x00\x00\x00y'all\x00 // field value -// \x00 // document terminating NUL - -let doc = DocBuf::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; -let elem: Option = doc.get("hi")?; -assert_eq!( - elem.unwrap().as_str()?, - "y'all", -); -# Ok::<(), bson::raw::RawError>(()) -``` - -### bson-rust interop - -This crate is designed to interoperate smoothly with the bson crate. - -A [`DocBuf`] can be created from a [`bson::document::Document`]. Internally, this -serializes the `Document` to a `Vec`, and then includes those bytes in the [`DocBuf`]. - -```rust -use bson::doc; -use bson::raw::{ - DocBuf, -}; - -let document = doc!{"goodbye": {"cruel": "world"}}; -let raw = DocBuf::from_document(&document); -let value: Option<&str> = raw.get_document("goodbye")? - .map(|doc| doc.get_str("cruel")) - .transpose()? - .flatten(); - -assert_eq!( - value, - Some("world"), -); -# Ok::<(), bson::raw::RawError>(()) -``` - -### Reference types - -A BSON document can also be accessed with the [`Doc`] reference type, -which is an unsized type that represents the BSON payload as a `[u8]`. -This allows accessing nested documents without reallocation. [Doc] -must always be accessed via a pointer type, similarly to `[T]` and `str`. - -This type will coexist with the now deprecated [DocRef] type for at -least one minor release. - -The below example constructs a bson document in a stack-based array, -and extracts a &str from it, performing no heap allocation. - -```rust -use bson::raw::Doc; - -let bytes = b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00"; -assert_eq!(Doc::new(bytes)?.get_str("hi")?, Some("y'all")); -# Ok::<(), bson::raw::RawError>(()) -``` - -### Iteration - -[`Doc`] implements [`IntoIterator`](std::iter::IntoIterator), which can also -be accessed via [`DocBuf::iter`]. - -```rust -use bson::doc; -use bson::raw::{DocBuf, elem::Element}; - -let doc = DocBuf::from_document(&doc! {"crate": "rawbson", "license": "MIT"}); -let mut dociter = doc.iter(); - -let (key, value): (&str, Element) = dociter.next().unwrap()?; -assert_eq!(key, "crate"); -assert_eq!(value.as_str()?, "rawbson"); - -let (key, value): (&str, Element) = dociter.next().unwrap()?; -assert_eq!(key, "license"); -assert_eq!(value.as_str()?, "MIT"); -# Ok::<(), bson::raw::RawError>(()) -``` - -### serde support - -There is also serde deserialization support. - -Serde serialization support is not yet provided. For now, use -[`bson::to_document`] instead, and then serialize it out using -[`bson::Document::to_writer`] or [`DocBuf::from_document`]. - -```rust -use serde::Deserialize; -use bson::{doc, Document, oid::ObjectId, DateTime}; -use bson::raw::{DocBuf, de::from_docbuf}; - -#[derive(Deserialize)] -#[serde(rename_all="camelCase")] -struct User { - #[serde(rename = "_id")] - id: ObjectId, - first_name: String, - last_name: String, - birthdate: Option>, - #[serde(flatten)] - extra: Document, -} - -let doc = DocBuf::from_document(&doc!{ - "_id": ObjectId::with_string("543254325432543254325432")?, - "firstName": "John", - "lastName": "Doe", - "birthdate": null, - "luckyNumbers": [3, 60, 2147483647], - "nickname": "Red", -}); - -let user: User = from_docbuf(&doc)?; -assert_eq!(user.id.to_hex(), "543254325432543254325432"); -assert_eq!(user.first_name, "John"); -assert_eq!(user.last_name, "Doe"); -assert_eq!(user.extra.get_str("nickname")?, "Red"); -assert!(user.birthdate.is_none()); -# Ok::<(), Box>(()) -``` -*/ +//! A RawDocument can be created from a `Vec` containing raw BSON data, and elements +//! accessed via methods similar to those available on the Document type. Note that rawbson returns +//! a RawResult>, since the bytes contained in the document are not fully validated until +//! trying to access the contained data. +//! +//! ```rust +//! use bson::raw::{ +//! RawBson, +//! RawDocument, +//! }; +//! +//! // See http://bsonspec.org/spec.html for details on the binary encoding of BSON. +//! let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +//! let elem: Option = doc.get("hi")?; +//! +//! assert_eq!( +//! elem?.as_str()?, +//! "y'all", +//! ); +//! # Ok::<(), bson::raw::RawError>(()) +//! ``` +//! +//! ### bson-rust interop +//! +//! A [`RawDocument`] can be created from a [`bson::document::Document`]. Internally, this +//! serializes the `Document` to a `Vec`, and then includes those bytes in the [`RawDocument`]. +//! +//! ```rust +//! use bson::{ +//! raw::RawDocument, +//! doc, +//! }; +//! +//! let document = doc! { +//! "goodbye": { +//! "cruel": "world" +//! } +//! }; + +//! let raw = RawDocument::from_document(&document); +//! let value: Option<&str> = raw +//! .get_document("goodbye")? +//! .map(|doc| doc.get_str("cruel")) +//! .transpose()? +//! .flatten(); +//! +//! assert_eq!( +//! value, +//! Some("world"), +//! ); +//! # Ok::<(), bson::raw::RawError>(()) +//! ``` +//! +//! ### Reference types +//! +//! A BSON document can also be accessed with the [`RawDocumentRef`] reference type, which is an +//! unsized type that represents the BSON payload as a `[u8]`. This allows accessing nested +//! documents without reallocation. [RawDocumentRef] must always be accessed via a pointer type, +//! similarly to `[T]` and `str`. +//! +//! The below example constructs a bson document in a stack-based array, +//! and extracts a &str from it, performing no heap allocation. + +//! ```rust +//! use bson::raw::Doc; +//! +//! let bytes = b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00"; +//! assert_eq!(RawDocumentRef::new(bytes)?.get_str("hi")?, Some("y'all")); +//! # Ok::<(), bson::raw::RawError>(()) +//! ``` +//! +//! ### Iteration +//! +//! [`RawDocumentRef`] implements [`IntoIterator`](std::iter::IntoIterator), which can also be +//! accessed via [`RawDocument::iter`]. + +//! ```rust +//! use bson::doc; +//! use bson::{ +//! raw::{ +//! RawBson, +//! RawDocument, +//! }, +//! doc, +//! }; +//! +//! let original_doc = doc! { +//! "crate": "bson", +//! "year": "2021", +//! }; +//! +//! let doc = RawDocument::from_document(&original_doc); +//! let mut doc_iter = doc.iter(); +//! +//! let (key, value): (&str, Element) = doc_iter.next().unwrap()?; +//! assert_eq!(key, "crate"); +//! assert_eq!(value.as_str()?, "rawbson"); +//! +//! let (key, value): (&str, Element) = doc_iter.next().unwrap()?; +//! assert_eq!(key, "year"); +//! assert_eq!(value.as_str()?, "2021"); +//! # Ok::<(), bson::raw::RawError>(()) +//! ``` + +mod elem; +#[cfg(test)] +mod props; use std::{ borrow::Borrow, @@ -149,37 +116,29 @@ use chrono::{DateTime, Utc}; #[cfg(feature = "decimal128")] use crate::decimal128::Decimal128; +use crate::{oid::ObjectId, spec::ElementType, Bson, Document}; +pub use elem::{RawBinary, RawBson, RawRegex, RawTimestamp}; -use crate::{document::ValueAccessError, oid, spec::ElementType, Bson}; - -pub mod de; -pub mod elem; - -#[cfg(test)] -mod props; - -/// Error to indicate that either a value was empty or it contained an unexpected -/// type, for use with the direct getters. +/// An error that occurs when attempting to parse raw BSON bytes. #[derive(Debug, PartialEq)] pub enum RawError { - /// Found a Bson value with the specified key, but not with the expected type + /// A BSON value did not fit the expected type. UnexpectedType, - /// The found value was not well-formed + /// A BSON value did not fit the proper format. MalformedValue(String), - /// Found a value where a utf-8 string was expected, but it was not valid - /// utf-8. The error value contains the malformed data as a string. + /// Improper UTF-8 bytes were found when proper UTF-7 was expected. The error value contains + /// the malformed data as bytes. Utf8EncodingError(Vec), } impl std::fmt::Display for RawError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - use RawError::*; match self { - UnexpectedType => write!(f, "unexpected type"), - MalformedValue(s) => write!(f, "malformed value: {:?}", s), - Utf8EncodingError(_) => write!(f, "utf-8 encoding error"), + Self::UnexpectedType => write!(f, "unexpected type"), + Self::MalformedValue(s) => write!(f, "malformed value: {:?}", s), + Self::Utf8EncodingError(_) => write!(f, "utf-8 encoding error"), } } } @@ -187,41 +146,21 @@ impl std::fmt::Display for RawError { impl std::error::Error for RawError {} pub type RawResult = Result; -type OptResult = RawResult>; - -impl<'a> From for ValueAccessError { - fn from(src: RawError) -> ValueAccessError { - match src { - RawError::UnexpectedType => ValueAccessError::UnexpectedType, - RawError::MalformedValue(_) => ValueAccessError::UnexpectedType, - RawError::Utf8EncodingError(_) => ValueAccessError::UnexpectedType, - } - } -} -impl<'a> From for RawError { - fn from(src: ValueAccessError) -> RawError { - match src { - ValueAccessError::NotPresent => unreachable!("This should be converted to an Option"), - ValueAccessError::UnexpectedType => RawError::UnexpectedType, - } - } -} - -/// A BSON document, stored as raw binary data on the heap. This can be created from -/// a `Vec` or a [`bson::Document`]. +/// A BSON document, stored as raw bytes on the heap. This can be created from a `Vec` or +/// a [`bson::Document`]. /// -/// Accessing elements within the `DocBuf` is similar to element access in [bson::Document], -/// but as the contents are parsed during iteration, instead of at creation time, format -/// errors can happen at any time during use, instead of at creation time. +/// Accessing elements within a `RawDocument` is similar to element access in [bson::Document], but +/// because the contents are parsed during iteration, instead of at creation time, format errors can +/// happen at any time during use. /// -/// DocBuf can be iterated over, yielding a Result containing key-value pairs that -/// borrow from the DocBuf instead of allocating, when necessary. +/// Iterating over a RawDocument yields either an error or a key-value pair that borrows from the +/// original document without making any additional allocations. /// /// ``` -/// # use bson::raw::{DocBuf, RawError}; -/// let docbuf = DocBuf::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; -/// let mut iter = docbuf.iter(); +/// # use bson::raw::{RawDocument, RawError}; +/// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +/// let mut iter = doc.iter(); /// let (key, value) = iter.next().unwrap()?; /// assert_eq!(key, "hi"); /// assert_eq!(value.as_str(), Ok("y'all")); @@ -229,118 +168,97 @@ impl<'a> From for RawError { /// # Ok::<(), RawError>(()) /// ``` /// -/// Individual elements can be accessed using [`docbuf.get(&key)`](Doc::get), or any of -/// the `get_*` methods, like [`docbuf.get_object_id(&key)`](Doc::get_object_id), and -/// [`docbuf.get_str(&str)`](Doc::get_str). Accessing elements is an O(N) operation, -/// as it requires iterating through the document from the beginning to find the requested -/// key. +/// Individual elements can be accessed using [`RawDocument::get`](RawDocument::get) or any of the +/// type-specific getters, such as [`RawDocument::get_object_id`](RawDocument::get_object_id) or +/// [`RawDocument::get_str`](RawDocument::get_str). Note that accessing elements is an O(N) +/// operation, as it requires iterating through the document from the beginning to find the +/// requested key. /// /// ``` -/// # use bson::raw::{DocBuf, RawError}; -/// let docbuf = DocBuf::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; -/// assert_eq!(docbuf.get_str("hi")?, Some("y'all")); +/// # use bson::raw::{RawDocument, RawError}; +/// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +/// assert_eq!(doc.get_str("hi")?, Some("y'all")); /// # Ok::<(), RawError>(()) /// ``` #[derive(Clone, Debug)] -pub struct DocBuf { +pub struct RawDocument { data: Box<[u8]>, } -impl DocBuf { - /// Create a new `DocBuf` from the provided `Vec`. +impl RawDocument { + /// Constructs a new RawDocument, validating _only_ the + /// following invariants: + /// * `data` is at least five bytes long (the minimum for a valid BSON document) + /// * the initial four bytes of `data` accurately represent the length of the bytes as + /// required by the BSON spec. + /// * the last byte of `data` is a 0 /// - /// The data is checked for a declared length equal to the length of the Vec, - /// and a trailing NUL byte. Other validation is deferred to access time. + /// Note that the internal structure of the bytes representing the + /// BSON elements is _not_ validated at all by this method. If the + /// bytes do not conform to the BSON spec, then method calls on + /// the RawDocument will return RawErrors where appropriate. /// /// ``` - /// # use bson::raw::{DocBuf, RawError}; - /// let docbuf: DocBuf = DocBuf::new(b"\x05\0\0\0\0".to_vec())?; + /// # use bson::raw::{RawDocument, RawError}; + /// let doc = RawDocument::new(b"\x05\0\0\0\0".to_vec())?; /// # Ok::<(), RawError>(()) /// ``` - pub fn new(data: Vec) -> RawResult { + pub fn new(data: Vec) -> RawResult { if data.len() < 5 { return Err(RawError::MalformedValue("document too short".into())); } + let length = i32_from_slice(&data[..4]); + if data.len() as i32 != length { return Err(RawError::MalformedValue("document length incorrect".into())); } + if data[data.len() - 1] != 0 { return Err(RawError::MalformedValue( "document not null-terminated".into(), )); } - Ok(unsafe { DocBuf::new_unchecked(data) }) + + Ok(Self { + data: data.into_boxed_slice(), + }) } - /// Create a DocBuf from a [bson::Document]. + /// Create a RawDocument from a Document. /// /// ``` - /// # use bson::raw::{DocBuf, RawError}; - /// use bson::{doc, oid}; + /// # use bson::raw::{RawDocument, RawError}; + /// use bson::{doc, oid::ObjectId}; + /// /// let document = doc! { - /// "_id": oid::ObjectId::new(), + /// "_id": ObjectId::new(), /// "name": "Herman Melville", /// "title": "Moby-Dick", /// }; - /// let docbuf: DocBuf = DocBuf::from_document(&document); + /// let doc = RawDocument::from_document(&document); /// # Ok::<(), RawError>(()) /// ``` - pub fn from_document(doc: &crate::Document) -> DocBuf { + pub fn from_document(doc: &Document) -> RawDocument { let mut data = Vec::new(); doc.to_writer(&mut data).unwrap(); - unsafe { DocBuf::new_unchecked(data) } - } - /// Create a DocBuf from an owned Vec without performing any checks on the provided data. - /// - /// ``` - /// # use bson::raw::{DocBuf, RawError}; - /// let docbuf: DocBuf = unsafe { - /// DocBuf::new_unchecked(b"\x05\0\0\0\0".to_vec()) - /// }; - /// # Ok::<(), RawError>(()) - /// ``` - /// - /// # Safety - /// - /// The provided bytes must have a valid length marker, and be NUL terminated. - pub unsafe fn new_unchecked(data: Vec) -> DocBuf { - DocBuf { + Self { data: data.into_boxed_slice(), } } - /// Return a [`&Doc`](Doc) borrowing from the data contained in self. - /// - /// # Deprecation - /// - /// DocRef is now a deprecated type alias for [Doc]. DocBuf can - /// dereference to &Doc directly, or be converted using [AsRef::as_ref], - /// so this function is unnecessary. + /// Gets an iterator over the elements in the `RawDocument`, which yields `Result<&str, + /// Element<'_>>`. /// /// ``` - /// # use bson::raw::{DocBuf, DocRef, RawError}; - /// let docbuf = DocBuf::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; - /// let docref: DocRef = docbuf.as_docref(); - /// # Ok::<(), RawError>(()) - /// ``` - #[deprecated(since = "0.2.0", note = "use docbuf.as_ref() instead")] - pub fn as_docref(&self) -> &Doc { - self.as_ref() - } - - /// Return an iterator over the elements in the `DocBuf`, borrowing data. + /// # use bson::raw::{elem, RawDocument, RawError}; + /// use bson::doc; /// - /// The associated item type is `Result<&str, Element<'_>>`. An error is - /// returned if data is malformed. + /// let doc = RawDocument::from_document(&doc! { "ferris": true }); /// - /// ``` - /// # use bson::raw::{elem, DocBuf, RawError}; - /// use bson::doc; - /// let docbuf = DocBuf::from_document(&doc! { "ferris": true }); - /// for element in docbuf.iter() { - /// let (key, value): (&str, elem::Element) = element?; + /// for element in doc.iter() { + /// let (key, value) = element?; /// assert_eq!(key, "ferris"); /// assert_eq!(value.as_bool()?, true); /// } @@ -349,81 +267,81 @@ impl DocBuf { /// /// # Note: /// - /// There is no owning iterator for DocBuf. If you need ownership over + /// There is no owning iterator for RawDocument. If you need ownership over /// elements that might need to allocate, you must explicitly convert /// them to owned types yourself. - pub fn iter(&self) -> DocIter<'_> { + pub fn iter(&self) -> RawDocumentIter<'_> { self.into_iter() } /// Return the contained data as a `Vec` /// /// ``` - /// # use bson::raw::DocBuf; + /// # use bson::raw::RawDocument; /// use bson::doc; - /// let docbuf = DocBuf::from_document(&doc!{}); - /// assert_eq!(docbuf.into_inner(), b"\x05\x00\x00\x00\x00".to_vec()); + /// + /// let doc = RawDocument::from_document(&doc!{}); + /// assert_eq!(doc.into_inner(), b"\x05\x00\x00\x00\x00".to_vec()); /// ``` pub fn into_inner(self) -> Vec { self.data.to_vec() } } -impl TryFrom for crate::Document { +impl TryFrom for Document { type Error = RawError; - fn try_from(rawdoc: DocBuf) -> RawResult { - crate::Document::try_from(rawdoc.as_ref()) + fn try_from(raw: RawDocument) -> RawResult { + Document::try_from(raw.as_ref()) } } -impl<'a> IntoIterator for &'a DocBuf { - type IntoIter = DocIter<'a>; - type Item = RawResult<(&'a str, elem::Element<'a>)>; +impl<'a> IntoIterator for &'a RawDocument { + type IntoIter = RawDocumentIter<'a>; + type Item = RawResult<(&'a str, RawBson<'a>)>; - fn into_iter(self) -> DocIter<'a> { - DocIter { + fn into_iter(self) -> RawDocumentIter<'a> { + RawDocumentIter { doc: &self, offset: 4, } } } -impl AsRef for DocBuf { - fn as_ref(&self) -> &Doc { - // SAFETY: Constructing the DocBuf checks the envelope validity of the BSON document. - unsafe { Doc::new_unchecked(&self.data) } +impl AsRef for RawDocument { + fn as_ref(&self) -> &RawDocumentRef { + RawDocumentRef::new_unchecked(&self.data) } } -impl Borrow for DocBuf { - fn borrow(&self) -> &Doc { +impl Borrow for RawDocument { + fn borrow(&self) -> &RawDocumentRef { &*self } } -impl ToOwned for Doc { - type Owned = DocBuf; +impl ToOwned for RawDocumentRef { + type Owned = RawDocument; fn to_owned(&self) -> Self::Owned { - self.to_docbuf() + self.to_raw_document() } } -/// A BSON document, referencing raw binary data stored elsewhere. This can be created from -/// a [DocBuf] or any type that contains valid BSON data, and can be referenced as a `[u8]`, +/// A BSON document referencing raw bytes stored elsewhere. This can be created from a +/// [RawDocument] or any type that contains valid BSON data, and can be referenced as a `[u8]`, /// including static binary literals, [Vec](std::vec::Vec), or arrays. /// -/// Accessing elements within the `Doc` is similar to element access in [bson::Document], -/// but as the contents are parsed during iteration, instead of at creation time, format -/// errors can happen at any time during use, instead of at creation time. -/// -/// Doc can be iterated over, yielding a Result containing key-value pairs that share the -/// borrow with the source bytes instead of allocating, when necessary. +/// Accessing elements within a `RawDocumentRef` is similar to element access in [bson::Document], +/// but because the contents are parsed during iteration, instead of at creation time, format errors +/// can happen at any time during use. /// +/// Iterating over a RawDocumentRef yields either an error or a key-value pair that borrows from the +/// original document without making any additional allocations. + /// ``` /// # use bson::raw::{Doc, RawError}; -/// let doc = Doc::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00")?; +/// let doc = RawDocumentRef::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00")?; /// let mut iter = doc.into_iter(); /// let (key, value) = iter.next().unwrap()?; /// assert_eq!(key, "hi"); @@ -432,97 +350,116 @@ impl ToOwned for Doc { /// # Ok::<(), RawError>(()) /// ``` /// -/// Individual elements can be accessed using [`doc.get(&key)`](Doc::get), or any of -/// the `get_*` methods, like [`doc.get_object_id(&key)`](Doc::get_object_id), and -/// [`doc.get_str(&str)`](Doc::get_str). Accessing elements is an O(N) operation, -/// as it requires iterating through the document from the beginning to find the requested -/// key. +/// Individual elements can be accessed using [`RawDocumentRef::get`](RawDocumentRef::get) or any of +/// the type-specific getters, such as +/// [`RawDocumentRef::get_object_id`](RawDocumentRef::get_object_id) or [`RawDocumentRef:: +/// get_str`](RawDocumentRef::get_str). Note that accessing elements is an O(N) operation, as it +/// requires iterating through the document from the beginning to find the requested key. /// /// ``` -/// # use bson::raw::{DocBuf, RawError}; -/// let docbuf = DocBuf::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; -/// assert_eq!(docbuf.get_str("hi")?, Some("y'all")); +/// # use bson::raw::{RawDocument, RawError}; +/// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +/// assert_eq!(doc.get_str("hi")?, Some("y'all")); /// # Ok::<(), RawError>(()) /// ``` #[derive(Debug)] -pub struct Doc { +pub struct RawDocumentRef { data: [u8], } -impl Doc { - pub fn new + ?Sized>(data: &D) -> RawResult<&Doc> { +impl RawDocumentRef { + /// Constructs a new RawDocumentRef, validating _only_ the + /// following invariants: + /// * `data` is at least five bytes long (the minimum for a valid BSON document) + /// * the initial four bytes of `data` accurately represent the length of the bytes as + /// required by the BSON spec. + /// * the last byte of `data` is a 0 + /// + /// Note that the internal structure of the bytes representing the + /// BSON elements is _not_ validated at all by this method. If the + /// bytes do not conform to the BSON spec, then method calls on + /// the RawDocument will return RawErrors where appropriate. + /// + /// ``` + /// # use bson::raw::{RawDocument, RawError}; + /// let doc = RawDocumentRef::new(b"\x05\0\0\0\0")?; + /// # Ok::<(), RawError>(()) + /// ``` + pub fn new + ?Sized>(data: &D) -> RawResult<&RawDocumentRef> { let data = data.as_ref(); + if data.len() < 5 { return Err(RawError::MalformedValue("document too short".into())); } + let length = i32_from_slice(&data[..4]); + if data.len() as i32 != length { return Err(RawError::MalformedValue("document length incorrect".into())); } + if data[data.len() - 1] != 0 { return Err(RawError::MalformedValue( "document not null-terminated".into(), )); } - Ok(unsafe { Doc::new_unchecked(data) }) + + Ok(RawDocumentRef::new_unchecked(data)) } - /// Create a new Doc referencing the provided data slice. - /// - /// # Safety - /// - /// The provided data must begin with a valid size - /// and end with a NUL-terminator. - /// - /// ``` - /// # use bson::raw::{Doc, RawError}; - /// let doc: &Doc = unsafe { Doc::new_unchecked(b"\x05\0\0\0\0") }; - /// ``` - pub unsafe fn new_unchecked + ?Sized>(data: &D) -> &Doc { - #[allow(unused_unsafe)] - unsafe { - &*(data.as_ref() as *const [u8] as *const Doc) - } + /// Creates a new Doc referencing the provided data slice. + fn new_unchecked + ?Sized>(data: &D) -> &RawDocumentRef { + // SAFETY: + // + // Dereferencing a raw pointer requires unsafe due to the potential that the pointer is + // null, dangling, or misaligned. We know the pointer is not null or dangling due to the + // fact that it's created by a safe reference. Converting &[u8] to *const [u8] will be + // properly aligned due to them being references to the same type, and converting *const + // [u8] to *const RawDocumentRef is aligned due to the fact that the only field in a + // RawDocumentRef is a [u8], meaning the structs are represented identically at the byte + // level. + unsafe { &*(data.as_ref() as *const [u8] as *const RawDocumentRef) } } - /// Create a new DocBuf with an owned copy of the data in self. + /// Creates a new RawDocument with an owned copy of the BSON bytes. /// /// ``` /// # use bson::raw::{Doc, RawError}; - /// use bson::raw::DocBuf; + /// use bson::raw::RawDocument; + /// /// let data = b"\x05\0\0\0\0"; - /// let doc = Doc::new(data)?; - /// let docbuf: DocBuf = doc.to_docbuf(); + /// let doc_ref = RawDocumentRef::new(data)?; + /// let doc: RawDocument = doc_ref.to_raw_document(); /// # Ok::<(), RawError>(()) - pub fn to_docbuf(&self) -> DocBuf { - // SAFETY: The validity of the data is checked by self. - unsafe { DocBuf::new_unchecked(self.data.to_owned()) } + pub fn to_raw_document(&self) -> RawDocument { + RawDocument { + data: self.data.to_owned().into_boxed_slice(), + } } - /// Get an element from the document. Finding a particular key requires - /// iterating over the document from the beginning, so this is an O(N) - /// operation. - /// - /// Returns an error if the document is malformed. Returns `Ok(None)` - /// if the key is not found in the document. + /// Gets a reference to the value corresponding to the given key by iterating until the key is + /// found. /// /// ``` - /// # use bson::raw::{DocBuf, elem::Element, RawError}; + /// # use bson::raw::{RawDocument, elem::Element, RawError}; + /// # /// use bson::{doc, oid::ObjectId}; - /// let docbuf = DocBuf::from_document(&doc! { + /// + /// let doc = RawDocument::from_document(&doc! { /// "_id": ObjectId::new(), /// "f64": 2.5, /// }); - /// let element = docbuf.get("f64")?.expect("finding key f64"); + /// + /// let element = doc.get("f64")?.expect("finding key f64"); /// assert_eq!(element.as_f64(), Ok(2.5)); - /// assert!(docbuf.get("unknown")?.is_none()); + /// assert!(doc.get("unknown")?.is_none()); /// # Ok::<(), RawError>(()) /// ``` - pub fn get<'a>(&'a self, key: &str) -> OptResult> { + pub fn get<'a>(&'a self, key: &str) -> RawResult>> { for result in self.into_iter() { - let (thiskey, bson) = result?; - if thiskey == key { - return Ok(Some(bson)); + let (k, v) = result?; + if key == k { + return Ok(Some(v)); } } Ok(None) @@ -531,406 +468,283 @@ impl Doc { fn get_with<'a, T>( &'a self, key: &str, - f: impl FnOnce(elem::Element<'a>) -> RawResult, - ) -> OptResult { + f: impl FnOnce(elem::RawBson<'a>) -> RawResult, + ) -> RawResult> { self.get(key)?.map(f).transpose() } - /// Get an element from the document, and convert it to f64. - /// - /// Returns an error if the document is malformed, or if the retrieved value - /// is not an f64. Returns `Ok(None)` if the key is not found in the document. + /// Gets a reference to the BSON double value corresponding to a given key or returns an error + /// if the key corresponds to a value which isn't a double. /// /// ``` - /// # use bson::raw::{DocBuf, elem::Element, RawError}; + /// # use bson::raw::{RawDocument, elem::Element, RawError}; /// use bson::doc; - /// let docbuf = DocBuf::from_document(&doc! { + /// + /// let doc = RawDocument::from_document(&doc! { /// "bool": true, /// "f64": 2.5, /// }); - /// assert_eq!(docbuf.get_f64("f64"), Ok(Some(2.5))); - /// assert_eq!(docbuf.get_f64("bool"), Err(RawError::UnexpectedType)); - /// assert_eq!(docbuf.get_f64("unknown"), Ok(None)); + /// + /// assert_eq!(doc.get_f64("f64"), Ok(Some(2.5))); + /// assert_eq!(doc.get_f64("bool"), Err(RawError::UnexpectedType)); + /// assert_eq!(doc.get_f64("unknown"), Ok(None)); /// # Ok::<(), RawError>(()) /// ``` - pub fn get_f64(&self, key: &str) -> OptResult { - self.get_with(key, elem::Element::as_f64) + pub fn get_f64(&self, key: &str) -> RawResult> { + self.get_with(key, elem::RawBson::as_f64) } - /// Get an element from the document, and convert it to a &str. - /// - /// The returned &str is a borrowed reference into the DocBuf. To use it - /// beyond the lifetime of self, call to_docbuf() on it. - /// - /// Returns an error if the document is malformed or if the retrieved value - /// is not a string. Returns `Ok(None)` if the key is not found in the - /// document. + /// Gets a reference to the string value corresponding to a given key or returns an error if the + /// key corresponds to a value which isn't a string. /// /// ``` - /// # use bson::raw::{DocBuf, elem::Element, RawError}; + /// # use bson::raw::{RawDocument, elem::Element, RawError}; /// use bson::doc; - /// let docbuf = DocBuf::from_document(&doc! { + /// + /// let doc = RawDocument::from_document(&doc! { /// "string": "hello", /// "bool": true, /// }); - /// assert_eq!(docbuf.get_str("string"), Ok(Some("hello"))); - /// assert_eq!(docbuf.get_str("bool"), Err(RawError::UnexpectedType)); - /// assert_eq!(docbuf.get_str("unknown"), Ok(None)); + /// + /// assert_eq!(doc.get_str("string"), Ok(Some("hello"))); + /// assert_eq!(doc.get_str("bool"), Err(RawError::UnexpectedType)); + /// assert_eq!(doc.get_str("unknown"), Ok(None)); /// # Ok::<(), RawError>(()) /// ``` - pub fn get_str<'a>(&'a self, key: &str) -> OptResult<&'a str> { - self.get_with(key, elem::Element::as_str) + pub fn get_str<'a>(&'a self, key: &str) -> RawResult> { + self.get_with(key, elem::RawBson::as_str) } - /// Get an element from the document, and convert it to a [Doc]. - /// - /// The returned [Doc] is a borrowed reference into self. To use it - /// beyond the lifetime of self, call to_owned() on it. - /// - /// Returns an error if the document is malformed or if the retrieved value - /// is not a document. Returns `Ok(None)` if the key is not found in the - /// document. + /// Gets a reference to the document value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a document. /// /// ``` - /// # use bson::raw::{DocBuf, elem::Element, RawError}; + /// # use bson::raw::{RawDocument, elem::Element, RawError}; /// use bson::doc; - /// let docbuf = DocBuf::from_document(&doc! { + /// + /// let doc = RawDocument::from_document(&doc! { /// "doc": { "key": "value"}, /// "bool": true, /// }); - /// assert_eq!(docbuf.get_document("doc")?.expect("finding key doc").get_str("key"), Ok(Some("value"))); - /// assert_eq!(docbuf.get_document("bool").unwrap_err(), RawError::UnexpectedType); - /// assert!(docbuf.get_document("unknown")?.is_none()); + /// + /// assert_eq!(doc.get_document("doc")?.expect("finding key doc").get_str("key"), Ok(Some("value"))); + /// assert_eq!(doc.get_document("bool").unwrap_err(), RawError::UnexpectedType); + /// assert!(doc.get_document("unknown")?.is_none()); /// # Ok::<(), RawError>(()) /// ``` - pub fn get_document<'a>(&'a self, key: &str) -> OptResult<&'a Doc> { - self.get_with(key, elem::Element::as_document) + pub fn get_document<'a>(&'a self, key: &str) -> RawResult> { + self.get_with(key, elem::RawBson::as_document) } - /// Get an element from the document, and convert it to an [ArrayRef]. - /// - /// The returned [ArrayRef] is a borrowed reference into the DocBuf. - /// - /// Returns an error if the document is malformed or if the retrieved value - /// is not a document. Returns `Ok(None)` if the key is not found in the - /// document. + /// Gets a reference to the array value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't an array. /// /// ``` - /// # use bson::raw::{DocBuf, elem::Element, RawError}; + /// # use bson::raw::{RawDocument, elem::Element, RawError}; /// use bson::doc; - /// let docbuf = DocBuf::from_document(&doc! { - /// "array": [true, 3, null], + /// + /// let doc = RawDocument::from_document(&doc! { + /// "array": [true, 3], /// "bool": true, /// }); - /// let mut arriter = docbuf.get_array("array")?.expect("finding key array").into_iter(); + /// + /// let mut arr_iter = docbuf.get_array("array")?.expect("finding key array").into_iter(); /// let _: bool = arriter.next().unwrap()?.as_bool()?; /// let _: i32 = arriter.next().unwrap()?.as_i32()?; - /// let () = arriter.next().unwrap()?.as_null()?; - /// assert!(arriter.next().is_none()); - /// assert!(docbuf.get_array("bool").is_err()); - /// assert!(docbuf.get_array("unknown")?.is_none()); + /// + /// assert!(arr_iter.next().is_none()); + /// assert!(doc.get_array("bool").is_err()); + /// assert!(doc.get_array("unknown")?.is_none()); /// # Ok::<(), RawError>(()) /// ``` - pub fn get_array<'a>(&'a self, key: &str) -> OptResult<&'a Array> { - self.get_with(key, elem::Element::as_array) + pub fn get_array<'a>(&'a self, key: &str) -> RawResult> { + self.get_with(key, elem::RawBson::as_array) } - /// Get an element from the document, and convert it to an [elem::RawBsonBinary]. + /// Gets a reference to the BSON binary value corresponding to a given key or returns an error + /// if the key corresponds to a value which isn't a binary value. /// - /// The returned [RawBsonBinary](elem::RawBsonBinary) is a borrowed reference into the DocBuf. + /// ``` + /// # use bson::raw::{RawDocument, elem, RawError}; /// - /// Returns an error if the document is malformed or if the retrieved value - /// is not binary data. Returns `Ok(None)` if the key is not found in the - /// document. + /// use bson::{ + /// spec::BinarySubtype + /// doc, Binary, + /// }; /// - /// ``` - /// # use bson::raw::{DocBuf, elem, RawError}; - /// use bson::{doc, Binary, spec::BinarySubtype}; - /// let docbuf = DocBuf::from_document(&doc! { + /// let doc = RawDocument::from_document(&doc! { /// "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1, 2, 3] }, /// "bool": true, /// }); - /// assert_eq!(docbuf.get_binary("binary")?.map(elem::RawBsonBinary::as_bytes), Some(&[1, 2, 3][..])); - /// assert_eq!(docbuf.get_binary("bool").unwrap_err(), RawError::UnexpectedType); - /// assert!(docbuf.get_binary("unknown")?.is_none()); + /// + /// assert_eq!(doc.get_binary("binary")?.map(elem::RawBsonBinary::as_bytes), Some(&[1, 2, 3][..])); + /// assert_eq!(doc.get_binary("bool").unwrap_err(), RawError::UnexpectedType); + /// assert!(doc.get_binary("unknown")?.is_none()); /// # Ok::<(), RawError>(()) /// ``` - pub fn get_binary<'a>(&'a self, key: &str) -> OptResult> { - self.get_with(key, elem::Element::as_binary) + pub fn get_binary<'a>(&'a self, key: &str) -> RawResult>> { + self.get_with(key, elem::RawBson::as_binary) } - /// Get an element from the document, and convert it to a [bson::oid::ObjectId]. - /// - /// Returns an error if the document is malformed or if the retrieved value - /// is not an object ID. Returns `Ok(None)` if the key is not found in the - /// document. + /// Gets a reference to the ObjectId value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't an ObjectId. /// /// ``` - /// # use bson::raw::{DocBuf, RawError}; + /// # use bson::raw::{RawDocument, RawError}; /// use bson::{doc, oid::ObjectId}; - /// let docbuf = DocBuf::from_document(&doc! { + /// + /// let doc = RawDocument::from_document(&doc! { /// "_id": ObjectId::new(), /// "bool": true, /// }); - /// let _: ObjectId = docbuf.get_object_id("_id")?.unwrap(); - /// assert_eq!(docbuf.get_object_id("bool").unwrap_err(), RawError::UnexpectedType); - /// assert!(docbuf.get_object_id("unknown")?.is_none()); + /// + /// let oid = doc.get_object_id("_id")?.unwrap(); + /// assert_eq!(doc.get_object_id("bool").unwrap_err(), RawError::UnexpectedType); + /// assert!(doc.get_object_id("unknown")?.is_none()); /// # Ok::<(), RawError>(()) /// ``` - pub fn get_object_id(&self, key: &str) -> OptResult { - self.get_with(key, elem::Element::as_object_id) + pub fn get_object_id(&self, key: &str) -> RawResult> { + self.get_with(key, elem::RawBson::as_object_id) } - /// Get an element from the document, and convert it to a [bool]. - /// - /// Returns an error if the document is malformed or if the retrieved value - /// is not a boolean. Returns `Ok(None)` if the key is not found in the - /// document. + /// Gets a reference to the boolean value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a boolean. /// /// ``` - /// # use bson::raw::{DocBuf, RawError}; + /// # use bson::raw::{RawDocument, RawError}; /// use bson::{doc, oid::ObjectId}; - /// let docbuf = DocBuf::from_document(&doc! { + /// + /// let doc = RawDocument::from_document(&doc! { /// "_id": ObjectId::new(), /// "bool": true, /// }); - /// assert!(docbuf.get_bool("bool")?.unwrap()); - /// assert_eq!(docbuf.get_bool("_id").unwrap_err(), RawError::UnexpectedType); - /// assert!(docbuf.get_object_id("unknown")?.is_none()); + /// + /// assert!(doc.get_bool("bool")?.unwrap()); + /// assert_eq!(doc.get_bool("_id").unwrap_err(), RawError::UnexpectedType); + /// assert!(doc.get_object_id("unknown")?.is_none()); /// # Ok::<(), RawError>(()) /// ``` - pub fn get_bool(&self, key: &str) -> OptResult { - self.get_with(key, elem::Element::as_bool) + pub fn get_bool(&self, key: &str) -> RawResult> { + self.get_with(key, elem::RawBson::as_bool) } - /// Get an element from the document, and convert it to a [chrono::DateTime]. - /// - /// Returns an error if the document is malformed or if the retrieved value - /// is not a boolean. Returns `Ok(None)` if the key is not found in the - /// document. + /// Gets a reference to the BSON DateTime value corresponding to a given key or returns an error + /// if the key corresponds to a value which isn't a DateTime. /// /// ``` - /// # use bson::raw::{DocBuf, RawError}; + /// # use bson::raw::{RawDocument, RawError}; /// use bson::doc; /// use chrono::{Utc, Datelike, TimeZone}; - /// let docbuf = DocBuf::from_document(&doc! { - /// "created_at": Utc.ymd(2020, 3, 15).and_hms(17, 0, 0), - /// "bool": true, - /// }); - /// assert_eq!(docbuf.get_datetime("created_at")?.unwrap().year(), 2020); - /// assert_eq!(docbuf.get_datetime("bool").unwrap_err(), RawError::UnexpectedType); - /// assert!(docbuf.get_datetime("unknown")?.is_none()); - /// # Ok::<(), RawError>(()) - /// ``` - pub fn get_datetime(&self, key: &str) -> OptResult> { - self.get_with(key, elem::Element::as_datetime) - } - - /// Get an element from the document, and convert it to the `()` type. - /// - /// Returns an error if the document is malformed or if the retrieved value - /// is not null. Returns `Ok(None)` if the key is not found in the - /// document. /// - /// There is not much reason to use the () value, so this method mostly - /// exists for consistency with other element types, and as a way to assert - /// type of the element. - /// ``` - /// # use bson::raw::{DocBuf, RawError}; - /// use bson::doc; - /// let docbuf = DocBuf::from_document(&doc! { - /// "null": null, + /// let doc = RawDocument::from_document(&doc! { + /// "created_at": Utc.ymd(2020, 3, 15).and_hms(17, 0, 0), /// "bool": true, /// }); - /// docbuf.get_null("null")?.unwrap(); - /// assert_eq!(docbuf.get_null("bool").unwrap_err(), RawError::UnexpectedType); - /// assert!(docbuf.get_null("unknown")?.is_none()); + /// assert_eq!(doc.get_datetime("created_at")?.unwrap().year(), 2020); + /// assert_eq!(doc.get_datetime("bool").unwrap_err(), RawError::UnexpectedType); + /// assert!(doc.get_datetime("unknown")?.is_none()); /// # Ok::<(), RawError>(()) /// ``` - pub fn get_null(&self, key: &str) -> OptResult<()> { - self.get_with(key, elem::Element::as_null) + pub fn get_datetime(&self, key: &str) -> RawResult>> { + self.get_with(key, elem::RawBson::as_datetime) } - - /// Get an element from the document, and convert it to an [elem::RawBsonRegex]. - /// - /// The [RawBsonRegex](elem::RawBsonRegex) borrows data from the DocBuf. + /// Gets a reference to the BSON regex value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a regex. /// - /// Returns an error if the document is malformed or if the retrieved value - /// is not a regex. Returns `Ok(None)` if the key is not found in the - /// document. /// ``` - /// # use bson::raw::{DocBuf, RawError, elem}; + /// # use bson::raw::{RawDocument, RawError, elem}; /// use bson::{doc, Regex}; - /// let docbuf = DocBuf::from_document(&doc! { + /// + /// let doc = RawDocument::from_document(&doc! { /// "regex": Regex { - /// pattern: String::from(r"end\s*$"), - /// options: String::from("i"), + /// pattern: r"end\s*$".into(), + /// options: "i".into(), /// }, /// "bool": true, /// }); - /// assert_eq!(docbuf.get_regex("regex")?.unwrap().pattern(), r"end\s*$"); - /// assert_eq!(docbuf.get_regex("regex")?.unwrap().options(), "i"); - /// assert_eq!(docbuf.get_regex("bool").unwrap_err(), RawError::UnexpectedType); - /// assert!(docbuf.get_regex("unknown")?.is_none()); - /// # Ok::<(), RawError>(()) - /// ``` - pub fn get_regex<'a>(&'a self, key: &str) -> OptResult> { - self.get_with(key, elem::Element::as_regex) - } - - /// Get an element from the document, and convert it to an &str representing the - /// javascript element type. /// - /// The &str borrows data from the DocBuf. If you need an owned copy of the data, - /// you should call .to_owned() on the result. - /// - /// Returns an error if the document is malformed or if the retrieved value - /// is not a javascript code object. Returns `Ok(None)` if the key is not found - /// in the document. - /// ``` - /// # use bson::raw::{DocBuf, RawError, elem}; - /// use bson::{doc, Bson}; - /// let docbuf = DocBuf::from_document(&doc! { - /// "js": Bson::JavaScriptCode(String::from("console.log(\"hi y'all\");")), - /// "bool": true, - /// }); - /// assert_eq!(docbuf.get_javascript("js")?, Some("console.log(\"hi y'all\");")); - /// assert_eq!(docbuf.get_javascript("bool").unwrap_err(), RawError::UnexpectedType); - /// assert!(docbuf.get_javascript("unknown")?.is_none()); + /// assert_eq!(doc.get_regex("regex")?.unwrap().pattern(), r"end\s*$"); + /// assert_eq!(doc.get_regex("regex")?.unwrap().options(), "i"); + /// assert_eq!(doc.get_regex("bool").unwrap_err(), RawError::UnexpectedType); + /// assert!(doc.get_regex("unknown")?.is_none()); /// # Ok::<(), RawError>(()) /// ``` - pub fn get_javascript<'a>(&'a self, key: &str) -> OptResult<&'a str> { - self.get_with(key, elem::Element::as_javascript) + pub fn get_regex<'a>(&'a self, key: &str) -> RawResult>> { + self.get_with(key, elem::RawBson::as_regex) } - /// Get an element from the document, and convert it to an &str representing the - /// symbol element type. + /// Gets a reference to the BSON timestamp value corresponding to a given key or returns an + /// error if the key corresponds to a value which isn't a timestamp. /// - /// The &str borrows data from the DocBuf. If you need an owned copy of the data, - /// you should call .to_owned() on the result. - /// - /// Returns an error if the document is malformed or if the retrieved value - /// is not a symbol object. Returns `Ok(None)` if the key is not found - /// in the document. /// ``` - /// # use bson::raw::{DocBuf, RawError, elem}; - /// use bson::{doc, Bson}; - /// let docbuf = DocBuf::from_document(&doc! { - /// "symbol": Bson::Symbol(String::from("internal")), + /// # use bson::raw::{RawDocument, elem, RawError}; + /// use bson::{doc, Timestamp}; + /// + /// let doc = RawDocument::from_document(&doc! { /// "bool": true, + /// "ts": Timestamp { time: 649876543, increment: 9 }, /// }); - /// assert_eq!(docbuf.get_symbol("symbol")?, Some("internal")); - /// assert_eq!(docbuf.get_symbol("bool").unwrap_err(), RawError::UnexpectedType); - /// assert!(docbuf.get_symbol("unknown")?.is_none()); - /// # Ok::<(), RawError>(()) - /// ``` - pub fn get_symbol<'a>(&'a self, key: &str) -> OptResult<&'a str> { - self.get_with(key, elem::Element::as_symbol) - } - - /// Get an element from the document, and extract the data as a javascript code with scope. /// - /// The return value is a `(&str, &Doc)` where the &str represents the javascript code, - /// and the [`&Doc`](Doc) represents the scope. Both elements borrow data from the DocBuf. - /// If you need an owned copy of the data, you should call [js.to_owned()](ToOwned::to_owned) on - /// the code or [scope.to_docbuf()](Doc::to_docbuf) on the scope. + /// let timestamp = doc.get_timestamp("ts")?.unwrap(); /// - /// Returns an error if the document is malformed or if the retrieved value - /// is not a javascript code with scope object. Returns `Ok(None)` if the key is not found - /// in the document. - /// ``` - /// # use bson::raw::{DocBuf, RawError, elem}; - /// use bson::{doc, JavaScriptCodeWithScope}; - /// let docbuf = DocBuf::from_document(&doc! { - /// "js": JavaScriptCodeWithScope { - /// code: String::from("console.log(\"i:\", i);"), - /// scope: doc!{"i": 42}, - /// }, - /// "bool": true, - /// }); - /// let (js, scope) = docbuf.get_javascript_with_scope("js")?.unwrap(); - /// assert_eq!(js, "console.log(\"i:\", i);"); - /// assert_eq!(scope.get_i32("i")?.unwrap(), 42); - /// assert_eq!(docbuf.get_javascript_with_scope("bool").unwrap_err(), RawError::UnexpectedType); - /// assert!(docbuf.get_javascript_with_scope("unknown")?.is_none()); + /// assert_eq!(timestamp.time(), 649876543); + /// assert_eq!(timestamp.increment(), 9); + /// assert_eq!(doc.get_timestamp("bool"), Err(RawError::UnexpectedType)); + /// assert_eq!(doc.get_timestamp("unknown"), Ok(None)); /// # Ok::<(), RawError>(()) /// ``` - pub fn get_javascript_with_scope<'a>(&'a self, key: &str) -> OptResult<(&'a str, &'a Doc)> { - self.get_with(key, elem::Element::as_javascript_with_scope) + pub fn get_timestamp<'a>(&'a self, key: &str) -> RawResult>> { + self.get_with(key, elem::RawBson::as_timestamp) } - /// Get an element from the document, and convert it to i32. - /// - /// Returns an error if the document is malformed, or if the retrieved value - /// is not an i32. Returns `Ok(None)` if the key is not found in the document. + /// Gets a reference to the BSON int32 value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a 32-bit integer. /// /// ``` - /// # use bson::raw::{DocBuf, RawError}; + /// # use bson::raw::{RawDocument, RawError}; /// use bson::doc; - /// let docbuf = DocBuf::from_document(&doc! { - /// "bool": true, - /// "i32": 1_000_000, - /// }); - /// assert_eq!(docbuf.get_i32("i32"), Ok(Some(1_000_000))); - /// assert_eq!(docbuf.get_i32("bool"), Err(RawError::UnexpectedType)); - /// assert_eq!(docbuf.get_i32("unknown"), Ok(None)); - /// # Ok::<(), RawError>(()) - /// ``` - pub fn get_i32(&self, key: &str) -> OptResult { - self.get_with(key, elem::Element::as_i32) - } - - /// Get an element from the document, and convert it to a timestamp. /// - /// Returns an error if the document is malformed, or if the retrieved value - /// is not an i32. Returns `Ok(None)` if the key is not found in the document. - /// - /// ``` - /// # use bson::raw::{DocBuf, elem, RawError}; - /// use bson::{doc, Timestamp}; - /// let docbuf = DocBuf::from_document(&doc! { + /// let doc = RawDocument::from_document(&doc! { /// "bool": true, - /// "ts": Timestamp { time: 649876543, increment: 9 }, + /// "i32": 1_000_000, /// }); - /// let timestamp = docbuf.get_timestamp("ts")?.unwrap(); /// - /// assert_eq!(timestamp.time(), 649876543); - /// assert_eq!(timestamp.increment(), 9); - /// assert_eq!(docbuf.get_timestamp("bool"), Err(RawError::UnexpectedType)); - /// assert_eq!(docbuf.get_timestamp("unknown"), Ok(None)); + /// assert_eq!(doc.get_i32("i32"), Ok(Some(1_000_000))); + /// assert_eq!(doc.get_i32("bool"), Err(RawError::UnexpectedType)); + /// assert_eq!(doc.get_i32("unknown"), Ok(None)); /// # Ok::<(), RawError>(()) /// ``` - pub fn get_timestamp<'a>(&'a self, key: &str) -> OptResult> { - self.get_with(key, elem::Element::as_timestamp) + pub fn get_i32(&self, key: &str) -> RawResult> { + self.get_with(key, elem::RawBson::as_i32) } - /// Get an element from the document, and convert it to i64. - /// - /// Returns an error if the document is malformed, or if the retrieved value - /// is not an i64. Returns `Ok(None)` if the key is not found in the document. + /// Gets a reference to the BSON int64 value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a 64-bit integer. /// /// ``` - /// # use bson::raw::{DocBuf, elem::Element, RawError}; + /// # use bson::raw::{RawDocument, elem::Element, RawError}; /// use bson::doc; - /// let docbuf = DocBuf::from_document(&doc! { + /// + /// let doc = RawDocument::from_document(&doc! { /// "bool": true, /// "i64": 9223372036854775807_i64, /// }); - /// assert_eq!(docbuf.get_i64("i64"), Ok(Some(9223372036854775807))); - /// assert_eq!(docbuf.get_i64("bool"), Err(RawError::UnexpectedType)); - /// assert_eq!(docbuf.get_i64("unknown"), Ok(None)); + /// + /// assert_eq!(doc.get_i64("i64"), Ok(Some(9223372036854775807))); + /// assert_eq!(doc.get_i64("bool"), Err(RawError::UnexpectedType)); + /// assert_eq!(doc.get_i64("unknown"), Ok(None)); /// # Ok::<(), RawError>(()) /// ``` - pub fn get_i64(&self, key: &str) -> OptResult { - self.get_with(key, elem::Element::as_i64) + pub fn get_i64(&self, key: &str) -> RawResult> { + self.get_with(key, elem::RawBson::as_i64) } /// Return a reference to the contained data as a `&[u8]` /// /// ``` - /// # use bson::raw::DocBuf; + /// # use bson::raw::RawDocument; /// use bson::doc; - /// let docbuf = DocBuf::from_document(&doc!{}); + /// let docbuf = RawDocument::from_document(&doc!{}); /// assert_eq!(docbuf.as_bytes(), b"\x05\x00\x00\x00\x00"); /// ``` pub fn as_bytes(&self) -> &[u8] { @@ -938,25 +752,24 @@ impl Doc { } } -impl AsRef for Doc { - fn as_ref(&self) -> &Doc { +impl AsRef for RawDocumentRef { + fn as_ref(&self) -> &RawDocumentRef { self } } -impl Deref for DocBuf { - type Target = Doc; +impl Deref for RawDocument { + type Target = RawDocumentRef; fn deref(&self) -> &Self::Target { - // SAFETY: The validity of the data is checked when creating DocBuf. - unsafe { Doc::new_unchecked(&self.data) } + RawDocumentRef::new_unchecked(&self.data) } } -impl TryFrom<&Doc> for crate::Document { +impl TryFrom<&RawDocumentRef> for crate::Document { type Error = RawError; - fn try_from(rawdoc: &Doc) -> RawResult { + fn try_from(rawdoc: &RawDocumentRef) -> RawResult { rawdoc .into_iter() .map(|res| res.and_then(|(k, v)| Ok((k.to_owned(), v.try_into()?)))) @@ -964,27 +777,27 @@ impl TryFrom<&Doc> for crate::Document { } } -impl<'a> IntoIterator for &'a Doc { - type IntoIter = DocIter<'a>; - type Item = RawResult<(&'a str, elem::Element<'a>)>; +impl<'a> IntoIterator for &'a RawDocumentRef { + type IntoIter = RawDocumentIter<'a>; + type Item = RawResult<(&'a str, RawBson<'a>)>; - fn into_iter(self) -> DocIter<'a> { - DocIter { + fn into_iter(self) -> RawDocumentIter<'a> { + RawDocumentIter { doc: self, offset: 4, } } } -pub struct DocIter<'a> { - doc: &'a Doc, +pub struct RawDocumentIter<'a> { + doc: &'a RawDocumentRef, offset: usize, } -impl<'a> Iterator for DocIter<'a> { - type Item = RawResult<(&'a str, elem::Element<'a>)>; +impl<'a> Iterator for RawDocumentIter<'a> { + type Item = RawResult<(&'a str, elem::RawBson<'a>)>; - fn next(&mut self) -> Option)>> { + fn next(&mut self) -> Option)>> { if self.offset == self.doc.data.len() - 1 { if self.doc.data[self.offset] == 0 { // end of document marker @@ -995,11 +808,14 @@ impl<'a> Iterator for DocIter<'a> { ))); } } + let key = match read_nullterminated(&self.doc.data[self.offset + 1..]) { Ok(key) => key, Err(err) => return Some(Err(err)), }; + let valueoffset = self.offset + 1 + key.len() + 1; // type specifier + key + \0 + let element_type = match ElementType::from(self.doc.data[self.offset]) { Some(et) => et, None => { @@ -1009,34 +825,41 @@ impl<'a> Iterator for DocIter<'a> { )))) } }; + let element_size = match element_type { ElementType::Double => 8, ElementType::String => { let size = 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + if self.doc.data[valueoffset + size - 1] != 0 { return Some(Err(RawError::MalformedValue( "string not null terminated".into(), ))); } + size } ElementType::EmbeddedDocument => { let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + if self.doc.data[valueoffset + size - 1] != 0 { return Some(Err(RawError::MalformedValue( "document not null terminated".into(), ))); } + size } ElementType::Array => { let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + if self.doc.data[valueoffset + size - 1] != 0 { return Some(Err(RawError::MalformedValue( "array not null terminated".into(), ))); } + size } ElementType::Binary => { @@ -1052,32 +875,39 @@ impl<'a> Iterator for DocIter<'a> { Ok(regex) => regex, Err(err) => return Some(Err(err)), }; + let options = match read_nullterminated(&self.doc.data[valueoffset + regex.len() + 1..]) { Ok(options) => options, Err(err) => return Some(Err(err)), }; + regex.len() + options.len() + 2 } ElementType::DbPointer => { let string_size = 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + let id_size = 12; + if self.doc.data[valueoffset + string_size - 1] != 0 { return Some(Err(RawError::MalformedValue( "DBPointer string not null-terminated".into(), ))); } + string_size + id_size } ElementType::JavaScriptCode => { let size = 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + if self.doc.data[valueoffset + size - 1] != 0 { return Some(Err(RawError::MalformedValue( "javascript code not null-terminated".into(), ))); } + size } ElementType::Symbol => { @@ -1085,11 +915,13 @@ impl<'a> Iterator for DocIter<'a> { } ElementType::JavaScriptCodeWithScope => { let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + if self.doc.data[valueoffset + size - 1] != 0 { return Some(Err(RawError::MalformedValue( "javascript with scope not null-terminated".into(), ))); } + size } ElementType::Int32 => 4, @@ -1099,171 +931,135 @@ impl<'a> Iterator for DocIter<'a> { ElementType::MaxKey => 0, ElementType::MinKey => 0, }; + let nextoffset = valueoffset + element_size; self.offset = nextoffset; + Some(Ok(( key, - elem::Element::new(element_type, &self.doc.data[valueoffset..nextoffset]), + elem::RawBson::new(element_type, &self.doc.data[valueoffset..nextoffset]), ))) } } -pub type ArrayRef<'a> = &'a Array; - -pub struct Array { - doc: Doc, +/// A BSON array referencing raw bytes stored elsewhere. +pub struct RawArray { + doc: RawDocumentRef, } -impl Array { - pub fn new(data: &[u8]) -> RawResult<&Array> { - Ok(Array::from_doc(Doc::new(data)?)) +impl RawArray { + fn new(data: &[u8]) -> RawResult<&RawArray> { + Ok(RawArray::from_doc(RawDocumentRef::new(data)?)) } - /// Return a new Array from the provided bytes. - /// - /// # Safety - /// - /// The provided bytes must start with a valid length indicator - /// and end with a NUL terminator, as described in [the bson - /// spec](http://bsonspec.org/spec.html). - /// - /// The following is valid: - /// ``` - /// # use bson::raw::Array; - /// // Represents the array [null, 514i32], which is the same as the document - /// // {"0": null, "1": 514} - /// let bson = b"\x0f\0\0\0\x0A0\0\x101\0\x02\x02\0\0\0"; - /// let arr = unsafe { Array::new_unchecked(bson) }; - /// let mut arriter = arr.into_iter(); - /// assert!(arriter.next().unwrap().and_then(|b| b.as_null()).is_ok()); - /// assert_eq!(arriter.next().unwrap().and_then(|b| b.as_i32()).unwrap(), 514); - /// ``` - /// - /// And so is this, even though the provided document is not an array, because - /// the errors will be caught during decode. - /// - /// ``` - /// # use bson::raw::Array; - /// // Represents the document {"0": null, "X": 514} - /// let bson = b"\x0f\0\0\0\x0A0\0\x10X\0\x02\x02\0\0\0"; - /// let arr = unsafe { Array::new_unchecked(bson) }; - /// let mut arriter = arr.into_iter(); - /// assert!(arriter.next().unwrap().and_then(|b| b.as_null()).is_ok()); - /// assert!(arriter.next().unwrap().is_err()); - /// assert!(arriter.next().is_none()); - /// ``` - /// - /// # Bad: - /// - /// The following, however, indicates the wrong size for the document, and is - /// therefore unsound. - /// - /// ``` - /// # use bson::raw::Array; - /// // Contains a length indicator, that is longer than the array - /// let invalid = b"\x06\0\0\0\0"; - /// let arr: &Array = unsafe { Array::new_unchecked(invalid) }; - /// ``` - pub unsafe fn new_unchecked(data: &[u8]) -> &Array { - #[allow(unused_unsafe)] - let doc = unsafe { Doc::new_unchecked(data) }; - Array::from_doc(doc) + fn from_doc(doc: &RawDocumentRef) -> &RawArray { + // SAFETY: + // + // Dereferencing a raw pointer requires unsafe due to the potential that the pointer is + // null, dangling, or misaligned. We know the pointer is not null or dangling due to the + // fact that it's created by a safe reference. Converting &RawDocumentRef to *const + // RawDocumentRef will be properly aligned due to them being references to the same type, + // and converting *const RawDocumentRef to *const RawArray is aligned due to the fact that + // the only field in a RawArray is a RawDocumentRef, meaning the structs are represented + // identically at the byte level. + unsafe { &*(doc as *const RawDocumentRef as *const RawArray) } } - pub fn from_doc(doc: &Doc) -> &Array { - // SAFETY: Array layout matches Doc layout - unsafe { &*(doc as *const Doc as *const Array) } - } - - pub fn get(&self, index: usize) -> OptResult> { + /// Gets a reference to the value at the given index. + pub fn get(&self, index: usize) -> RawResult>> { self.into_iter().nth(index).transpose() } fn get_with<'a, T>( &'a self, index: usize, - f: impl FnOnce(elem::Element<'a>) -> RawResult, - ) -> OptResult { + f: impl FnOnce(elem::RawBson<'a>) -> RawResult, + ) -> RawResult> { self.get(index)?.map(f).transpose() } - pub fn get_f64(&self, index: usize) -> OptResult { - self.get_with(index, elem::Element::as_f64) - } - - pub fn get_str(&self, index: usize) -> OptResult<&str> { - self.get_with(index, elem::Element::as_str) - } - - pub fn get_document(&self, index: usize) -> OptResult<&Doc> { - self.get_with(index, elem::Element::as_document) - } - - pub fn get_array(&self, index: usize) -> OptResult<&Array> { - self.get_with(index, elem::Element::as_array) - } - - pub fn get_binary(&self, index: usize) -> OptResult> { - self.get_with(index, elem::Element::as_binary) + /// Gets the BSON double at the given index or returns an error if the value at that index isn't + /// a double. + pub fn get_f64(&self, index: usize) -> RawResult> { + self.get_with(index, elem::RawBson::as_f64) } - pub fn get_object_id(&self, index: usize) -> OptResult { - self.get_with(index, elem::Element::as_object_id) + /// Gets a reference to the string at the given index or returns an error if the + /// value at that index isn't a string. + pub fn get_str(&self, index: usize) -> RawResult> { + self.get_with(index, elem::RawBson::as_str) } - pub fn get_bool(&self, index: usize) -> OptResult { - self.get_with(index, elem::Element::as_bool) + /// Gets a reference to the document at the given index or returns an error if the + /// value at that index isn't a document. + pub fn get_document(&self, index: usize) -> RawResult> { + self.get_with(index, elem::RawBson::as_document) } - pub fn get_datetime(&self, index: usize) -> OptResult> { - self.get_with(index, elem::Element::as_datetime) + /// Gets a reference to the array at the given index or returns an error if the + /// value at that index isn't a array. + pub fn get_array(&self, index: usize) -> RawResult> { + self.get_with(index, elem::RawBson::as_array) } - pub fn get_null(&self, index: usize) -> OptResult<()> { - self.get_with(index, elem::Element::as_null) + /// Gets a reference to the BSON binary value at the given index or returns an error if the + /// value at that index isn't a binary. + pub fn get_binary(&self, index: usize) -> RawResult>> { + self.get_with(index, elem::RawBson::as_binary) } - pub fn get_regex(&self, index: usize) -> OptResult> { - self.get_with(index, elem::Element::as_regex) + /// Gets the ObjectId at the given index or returns an error if the value at that index isn't an + /// ObjectId. + pub fn get_object_id(&self, index: usize) -> RawResult> { + self.get_with(index, elem::RawBson::as_object_id) } - pub fn get_javascript(&self, index: usize) -> OptResult<&str> { - self.get_with(index, elem::Element::as_javascript) + /// Gets the boolean at the given index or returns an error if the value at that index isn't a + /// boolean. + pub fn get_bool(&self, index: usize) -> RawResult> { + self.get_with(index, elem::RawBson::as_bool) } - pub fn get_symbol(&self, index: usize) -> OptResult<&str> { - self.get_with(index, elem::Element::as_symbol) + /// Gets the DateTime at the given index or returns an error if the value at that index isn't a + /// DateTime. + pub fn get_datetime(&self, index: usize) -> RawResult>> { + self.get_with(index, elem::RawBson::as_datetime) } - pub fn get_javascript_with_scope(&self, index: usize) -> OptResult<(&str, &Doc)> { - self.get_with(index, elem::Element::as_javascript_with_scope) + /// Gets a reference to the BSON regex at the given index or returns an error if the + /// value at that index isn't a regex. + pub fn get_regex(&self, index: usize) -> RawResult>> { + self.get_with(index, elem::RawBson::as_regex) } - pub fn get_i32(&self, index: usize) -> OptResult { - self.get_with(index, elem::Element::as_i32) + /// Gets a reference to the BSON timestamp at the given index or returns an error if the + /// value at that index isn't a timestamp. + pub fn get_timestamp(&self, index: usize) -> RawResult>> { + self.get_with(index, elem::RawBson::as_timestamp) } - pub fn get_timestamp(&self, index: usize) -> OptResult> { - self.get_with(index, elem::Element::as_timestamp) + /// Gets the BSON int32 at the given index or returns an error if the value at that index isn't + /// a 32-bit integer. + pub fn get_i32(&self, index: usize) -> RawResult> { + self.get_with(index, elem::RawBson::as_i32) } - pub fn get_i64(&self, index: usize) -> OptResult { - self.get_with(index, elem::Element::as_i64) - } - - pub fn to_vec(&self) -> RawResult>> { - self.into_iter().collect() + /// Gets BSON int64 at the given index or returns an error if the value at that index isn't a + /// 64-bit integer. + pub fn get_i64(&self, index: usize) -> RawResult> { + self.get_with(index, elem::RawBson::as_i64) } + /// Gets a reference to the raw bytes of the RawArray. pub fn as_bytes(&self) -> &[u8] { self.doc.as_bytes() } } -impl TryFrom<&Array> for Vec { +impl TryFrom<&RawArray> for Vec { type Error = RawError; - fn try_from(arr: &Array) -> RawResult> { + fn try_from(arr: &RawArray) -> RawResult> { arr.into_iter() .map(|result| { let rawbson = result?; @@ -1273,45 +1069,30 @@ impl TryFrom<&Array> for Vec { } } -impl<'a> IntoIterator for &'a Array { - type IntoIter = ArrayIter<'a>; - type Item = RawResult>; +impl<'a> IntoIterator for &'a RawArray { + type IntoIter = RawArrayIter<'a>; + type Item = RawResult>; - fn into_iter(self) -> ArrayIter<'a> { - ArrayIter { - dociter: self.doc.into_iter(), - index: 0, + fn into_iter(self) -> RawArrayIter<'a> { + RawArrayIter { + inner: self.doc.into_iter(), } } } -pub struct ArrayIter<'a> { - dociter: DocIter<'a>, - index: usize, +pub struct RawArrayIter<'a> { + inner: RawDocumentIter<'a>, } -impl<'a> Iterator for ArrayIter<'a> { - type Item = RawResult>; - - fn next(&mut self) -> Option>> { - let value = self.dociter.next().map(|result| { - let (key, bson) = match result { - Ok(value) => value, - Err(err) => return Err(err), - }; - - let index: usize = key - .parse() - .map_err(|_| RawError::MalformedValue("non-integer array index found".into()))?; +impl<'a> Iterator for RawArrayIter<'a> { + type Item = RawResult>; - if index == self.index { - Ok(bson) - } else { - Err(RawError::MalformedValue("wrong array index found".into())) - } - }); - self.index += 1; - value + fn next(&mut self) -> Option>> { + match self.inner.next() { + Some(Ok((_, v))) => Some(Ok(v)), + Some(Err(e)) => Some(Err(e)), + None => None, + } } } /// Given a 4 byte u8 slice, return an i32 calculated from the bytes in @@ -1377,13 +1158,17 @@ fn try_to_str(data: &[u8]) -> RawResult<&str> { } } -pub type DocRef<'a> = &'a Doc; - #[cfg(test)] mod tests { use super::*; use crate::{ - doc, spec::BinarySubtype, Binary, Bson, JavaScriptCodeWithScope, Regex, Timestamp, + doc, + spec::BinarySubtype, + Binary, + Bson, + JavaScriptCodeWithScope, + Regex, + Timestamp, }; use chrono::TimeZone; @@ -1400,7 +1185,7 @@ mod tests { "that": "second", "something": "else", }); - let rawdoc = Doc::new(&docbytes).unwrap(); + let rawdoc = RawDocumentRef::new(&docbytes).unwrap(); assert_eq!( rawdoc.get("that").unwrap().unwrap().as_str().unwrap(), "second", @@ -1414,7 +1199,7 @@ mod tests { "inner": "surprise", }, }); - let rawdoc = Doc::new(&docbytes).unwrap(); + let rawdoc = RawDocumentRef::new(&docbytes).unwrap(); assert_eq!( rawdoc .get("outer") @@ -1438,7 +1223,7 @@ mod tests { "peanut butter": "chocolate", "easy as": {"do": 1, "re": 2, "mi": 3}, }); - let rawdoc = Doc::new(&docbytes).expect("malformed bson document"); + let rawdoc = RawDocumentRef::new(&docbytes).expect("malformed bson document"); let mut dociter = rawdoc.into_iter(); let next = dociter.next().expect("no result").expect("invalid bson"); assert_eq!(next.0, "apples"); @@ -1461,7 +1246,7 @@ mod tests { "document": {}, "array": ["binary", "serialized", "object", "notation"], "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1, 2, 3] }, - "object_id": oid::ObjectId::with_bytes([1, 2, 3, 4, 5,6,7,8,9,10, 11,12]), + "object_id": ObjectId::with_bytes([1, 2, 3, 4, 5,6,7,8,9,10, 11,12]), "boolean": true, "datetime": Utc::now(), "null": Bson::Null, @@ -1475,7 +1260,7 @@ mod tests { "end": "END", }); - let rawdoc = Doc::new(&docbytes).expect("invalid document"); + let rawdoc = RawDocumentRef::new(&docbytes).expect("invalid document"); let _doc: crate::Document = rawdoc.try_into().expect("invalid bson"); } @@ -1483,7 +1268,7 @@ mod tests { fn f64() { #![allow(clippy::float_cmp)] - let rawdoc = DocBuf::from_document(&doc! {"f64": 2.5}); + let rawdoc = RawDocument::from_document(&doc! {"f64": 2.5}); assert_eq!( rawdoc .get("f64") @@ -1497,7 +1282,7 @@ mod tests { #[test] fn string() { - let rawdoc = DocBuf::from_document(&doc! {"string": "hello"}); + let rawdoc = RawDocument::from_document(&doc! {"string": "hello"}); assert_eq!( rawdoc @@ -1511,7 +1296,7 @@ mod tests { } #[test] fn document() { - let rawdoc = DocBuf::from_document(&doc! {"document": {}}); + let rawdoc = RawDocument::from_document(&doc! {"document": {}}); let doc = rawdoc .get("document") @@ -1524,8 +1309,9 @@ mod tests { #[test] fn array() { - let rawdoc = - DocBuf::from_document(&doc! { "array": ["binary", "serialized", "object", "notation"]}); + let rawdoc = RawDocument::from_document( + &doc! { "array": ["binary", "serialized", "object", "notation"]}, + ); let array = rawdoc .get("array") @@ -1540,10 +1326,10 @@ mod tests { #[test] fn binary() { - let rawdoc = DocBuf::from_document(&doc! { + let rawdoc = RawDocument::from_document(&doc! { "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] } }); - let binary: elem::RawBsonBinary<'_> = rawdoc + let binary: elem::RawBinary<'_> = rawdoc .get("binary") .expect("error finding key binary") .expect("no key binary") @@ -1555,8 +1341,8 @@ mod tests { #[test] fn object_id() { - let rawdoc = DocBuf::from_document(&doc! { - "object_id": oid::ObjectId::with_bytes([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), + let rawdoc = RawDocument::from_document(&doc! { + "object_id": ObjectId::with_bytes([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), }); let oid = rawdoc .get("object_id") @@ -1569,7 +1355,7 @@ mod tests { #[test] fn boolean() { - let rawdoc = DocBuf::from_document(&doc! { + let rawdoc = RawDocument::from_document(&doc! { "boolean": true, }); @@ -1585,7 +1371,7 @@ mod tests { #[test] fn datetime() { - let rawdoc = DocBuf::from_document(&doc! { + let rawdoc = RawDocument::from_document(&doc! { "boolean": true, "datetime": Utc.ymd(2000,10,31).and_hms(12, 30, 45), }); @@ -1600,7 +1386,7 @@ mod tests { #[test] fn null() { - let rawdoc = DocBuf::from_document(&doc! { + let rawdoc = RawDocument::from_document(&doc! { "null": null, }); let () = rawdoc @@ -1613,7 +1399,7 @@ mod tests { #[test] fn regex() { - let rawdoc = DocBuf::from_document(&doc! { + let rawdoc = RawDocument::from_document(&doc! { "regex": Bson::RegularExpression(Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}), }); let regex = rawdoc @@ -1627,7 +1413,7 @@ mod tests { } #[test] fn javascript() { - let rawdoc = DocBuf::from_document(&doc! { + let rawdoc = RawDocument::from_document(&doc! { "javascript": Bson::JavaScriptCode(String::from("console.log(console);")), }); let js = rawdoc @@ -1641,7 +1427,7 @@ mod tests { #[test] fn symbol() { - let rawdoc = DocBuf::from_document(&doc! { + let rawdoc = RawDocument::from_document(&doc! { "symbol": Bson::Symbol(String::from("artist-formerly-known-as")), }); @@ -1656,7 +1442,7 @@ mod tests { #[test] fn javascript_with_scope() { - let rawdoc = DocBuf::from_document(&doc! { + let rawdoc = RawDocument::from_document(&doc! { "javascript_with_scope": Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope{ code: String::from("console.log(msg);"), scope: doc!{"ok": true}}), }); let (js, scopedoc) = rawdoc @@ -1678,7 +1464,7 @@ mod tests { #[test] fn int32() { - let rawdoc = DocBuf::from_document(&doc! { + let rawdoc = RawDocument::from_document(&doc! { "int32": 23i32, }); let int32 = rawdoc @@ -1692,7 +1478,7 @@ mod tests { #[test] fn timestamp() { - let rawdoc = DocBuf::from_document(&doc! { + let rawdoc = RawDocument::from_document(&doc! { "timestamp": Bson::Timestamp(Timestamp { time: 3542578, increment: 7 }), }); let ts = rawdoc @@ -1708,7 +1494,7 @@ mod tests { #[test] fn int64() { - let rawdoc = DocBuf::from_document(&doc! { + let rawdoc = RawDocument::from_document(&doc! { "int64": 46i64, }); let int64 = rawdoc @@ -1727,7 +1513,7 @@ mod tests { "document": {}, "array": ["binary", "serialized", "object", "notation"], "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] }, - "object_id": oid::ObjectId::with_bytes([1, 2, 3, 4, 5,6,7,8,9,10, 11,12]), + "object_id": ObjectId::with_bytes([1, 2, 3, 4, 5,6,7,8,9,10, 11,12]), "boolean": true, "datetime": Utc::now(), "null": Bson::Null, @@ -1740,7 +1526,7 @@ mod tests { "int64": 46i64, "end": "END", }); - let rawdoc = unsafe { Doc::new_unchecked(&docbytes) }; + let rawdoc = unsafe { RawDocumentRef::new_unchecked(&docbytes) }; assert_eq!( rawdoc @@ -1766,11 +1552,11 @@ mod tests { "string": "hello", "document": {}, "array": ["binary", "serialized", "object", "notation"], - "object_id": oid::ObjectId::with_bytes([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), + "object_id": ObjectId::with_bytes([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] }, "boolean": false, }); - let rawbson = elem::Element::new(ElementType::EmbeddedDocument, &docbytes); + let rawbson = elem::RawBson::new(ElementType::EmbeddedDocument, &docbytes); let b: Bson = rawbson.try_into().expect("invalid bson"); let doc = b.as_document().expect("not a document"); assert_eq!(*doc.get("f64").expect("f64 not found"), Bson::Double(2.5)); @@ -1793,7 +1579,7 @@ mod tests { ); assert_eq!( *doc.get("object_id").expect("object_id not found"), - Bson::ObjectId(oid::ObjectId::with_bytes([ + Bson::ObjectId(ObjectId::with_bytes([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ])) ); @@ -1816,8 +1602,7 @@ mod proptests { use proptest::prelude::*; use std::convert::TryInto; - use super::props::arbitrary_bson; - use super::DocBuf; + use super::{props::arbitrary_bson, RawDocument}; use crate::doc; fn to_bytes(doc: &crate::Document) -> Vec { @@ -1829,7 +1614,7 @@ mod proptests { proptest! { #[test] fn no_crashes(s: Vec) { - let _ = DocBuf::new(s); + let _ = RawDocument::new(s); } #[test] @@ -1837,7 +1622,7 @@ mod proptests { println!("{:?}", bson); let doc = doc!{"bson": bson}; let raw = to_bytes(&doc); - let raw = DocBuf::new(raw); + let raw = RawDocument::new(raw); prop_assert!(raw.is_ok()); let raw = raw.unwrap(); let roundtrip: Result = raw.try_into(); From 4dda9b730499eafebd7747eb27a254779ea1f9dd Mon Sep 17 00:00:00 2001 From: Saghm Rossi Date: Thu, 11 Feb 2021 17:46:13 -0500 Subject: [PATCH 03/48] add raw code with scope type --- src/raw/elem.rs | 19 +++++++++++++++++++ src/raw/mod.rs | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/raw/elem.rs b/src/raw/elem.rs index 64221936..993bb91d 100644 --- a/src/raw/elem.rs +++ b/src/raw/elem.rs @@ -407,3 +407,22 @@ impl<'a> RawTimestamp<'a> { u32_from_slice(&self.data[0..4]) } } + +/// A BSON "code with scope" value referencing raw bytes stored elsewhere. +#[derive(Clone, Copy, Debug)] +pub struct RawJavaScriptCodeWithScope<'a> { + code: &'a str, + scope: &'a RawDocumentRef, +} + +impl<'a> RawJavaScriptCodeWithScope<'a> { + /// Gets the code in the value. + pub fn code(self) -> &'a str { + self.code + } + + /// Gets the scope in the value. + pub fn scope(self) -> &'a RawDocumentRef { + self.scope + } +} diff --git a/src/raw/mod.rs b/src/raw/mod.rs index ad842c97..11bd4849 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -117,7 +117,7 @@ use chrono::{DateTime, Utc}; #[cfg(feature = "decimal128")] use crate::decimal128::Decimal128; use crate::{oid::ObjectId, spec::ElementType, Bson, Document}; -pub use elem::{RawBinary, RawBson, RawRegex, RawTimestamp}; +pub use elem::{RawBinary, RawBson, RawJavaScriptCodeWithScope, RawRegex, RawTimestamp}; /// An error that occurs when attempting to parse raw BSON bytes. #[derive(Debug, PartialEq)] From 7770cf194dd601d40c4bef2b6c673191482b85b3 Mon Sep 17 00:00:00 2001 From: Saghm Rossi Date: Thu, 11 Feb 2021 17:46:44 -0500 Subject: [PATCH 04/48] add documentation to raw regex type --- src/raw/elem.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/raw/elem.rs b/src/raw/elem.rs index 993bb91d..3ec6818d 100644 --- a/src/raw/elem.rs +++ b/src/raw/elem.rs @@ -362,7 +362,7 @@ pub struct RawRegex<'a> { } impl<'a> RawRegex<'a> { - pub fn new(data: &'a [u8]) -> RawResult> { + pub(super) fn new(data: &'a [u8]) -> RawResult> { let pattern = read_nullterminated(data)?; let opts = read_nullterminated(&data[pattern.len() + 1..])?; if pattern.len() + opts.len() == data.len() - 2 { @@ -377,10 +377,12 @@ impl<'a> RawRegex<'a> { } } + /// Gets the pattern portion of the regex. pub fn pattern(self) -> &'a str { self.pattern } + /// Gets the options portion of the regex. pub fn options(self) -> &'a str { self.options } From 77adbdb1e6bd95c163c89b4e318a2bfb40e0c8ef Mon Sep 17 00:00:00 2001 From: Saghm Rossi Date: Thu, 11 Feb 2021 17:47:05 -0500 Subject: [PATCH 05/48] revise doc comments on raw timestamp getters --- src/raw/elem.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/raw/elem.rs b/src/raw/elem.rs index 3ec6818d..93b164a6 100644 --- a/src/raw/elem.rs +++ b/src/raw/elem.rs @@ -395,14 +395,14 @@ pub struct RawTimestamp<'a> { } impl<'a> RawTimestamp<'a> { - /// Return the time portion of the timestamp. + /// Gets the time portion of the timestamp. pub fn time(&self) -> u32 { // RawBsonTimestamp can only be constructed with the correct data length, so this should // always succeed. u32_from_slice(&self.data[4..8]) } - /// Return the increment portion of the timestamp. + /// Gets the increment portion of the timestamp. pub fn increment(&self) -> u32 { // RawBsonTimestamp can only be constructed with the correct data length, so this should // always succeed. From bd43739af49d3f151cc3d2dfff2e6e90a4a720ec Mon Sep 17 00:00:00 2001 From: Saghm Rossi Date: Thu, 11 Feb 2021 17:47:19 -0500 Subject: [PATCH 06/48] name field in raw malformed value error --- src/raw/elem.rs | 44 ++++++++++++----------- src/raw/mod.rs | 93 +++++++++++++++++++++++++++---------------------- 2 files changed, 75 insertions(+), 62 deletions(-) diff --git a/src/raw/elem.rs b/src/raw/elem.rs index 93b164a6..24f8f433 100644 --- a/src/raw/elem.rs +++ b/src/raw/elem.rs @@ -50,9 +50,9 @@ impl<'a> RawBson<'a> { pub fn as_f64(self) -> RawResult { if let ElementType::Double = self.element_type { Ok(f64::from_bits(u64::from_le_bytes( - self.data - .try_into() - .map_err(|_| RawError::MalformedValue("f64 should be 8 bytes long".into()))?, + self.data.try_into().map_err(|_| RawError::MalformedValue { + message: "f64 should be 8 bytes long".into(), + })?, ))) } else { Err(RawError::UnexpectedType) @@ -92,22 +92,22 @@ impl<'a> RawBson<'a> { let length = i32_from_slice(&self.data[0..4]); let subtype = BinarySubtype::from(self.data[4]); // TODO: This mishandles reserved values if self.data.len() as i32 != length + 5 { - return Err(RawError::MalformedValue( - "binary bson has wrong declared length".into(), - )); + return Err(RawError::MalformedValue { + message: "binary bson has wrong declared length".into(), + }); } let data = match subtype { BinarySubtype::BinaryOld => { if length < 4 { - return Err(RawError::MalformedValue( - "old binary subtype has no inner declared length".into(), - )); + return Err(RawError::MalformedValue { + message: "old binary subtype has no inner declared length".into(), + }); } let oldlength = i32_from_slice(&self.data[5..9]); if oldlength + 4 != length { - return Err(RawError::MalformedValue( - "old binary subtype has wrong inner declared length".into(), - )); + return Err(RawError::MalformedValue { + message: "old binary subtype has wrong inner declared length".into(), + }); } &self.data[9..] } @@ -123,7 +123,9 @@ impl<'a> RawBson<'a> { pub fn as_object_id(self) -> RawResult { if let ElementType::ObjectId = self.element_type { Ok(ObjectId::with_bytes(self.data.try_into().map_err( - |_| RawError::MalformedValue("object id should be 12 bytes long".into()), + |_| RawError::MalformedValue { + message: "object id should be 12 bytes long".into(), + }, )?)) } else { Err(RawError::UnexpectedType) @@ -134,14 +136,16 @@ impl<'a> RawBson<'a> { pub fn as_bool(self) -> RawResult { if let ElementType::Boolean = self.element_type { if self.data.len() != 1 { - Err(RawError::MalformedValue("boolean has length != 1".into())) + Err(RawError::MalformedValue { + message: "boolean has length != 1".into(), + }) } else { match self.data[0] { 0 => Ok(false), 1 => Ok(true), - _ => Err(RawError::MalformedValue( - "boolean value was not 0 or 1".into(), - )), + _ => Err(RawError::MalformedValue { + message: "boolean value was not 0 or 1".into(), + }), } } } else { @@ -371,9 +375,9 @@ impl<'a> RawRegex<'a> { options: opts, }) } else { - Err(RawError::MalformedValue( - "expected two null-terminated strings".into(), - )) + Err(RawError::MalformedValue { + message: "expected two null-terminated strings".into(), + }) } } diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 11bd4849..04c07f81 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -126,9 +126,9 @@ pub enum RawError { UnexpectedType, /// A BSON value did not fit the proper format. - MalformedValue(String), + MalformedValue { message: String }, - /// Improper UTF-8 bytes were found when proper UTF-7 was expected. The error value contains + /// Improper UTF-8 bytes were found when proper UTF-8 was expected. The error value contains /// the malformed data as bytes. Utf8EncodingError(Vec), } @@ -137,7 +137,7 @@ impl std::fmt::Display for RawError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { Self::UnexpectedType => write!(f, "unexpected type"), - Self::MalformedValue(s) => write!(f, "malformed value: {:?}", s), + Self::MalformedValue { message } => write!(f, "malformed value: {:?}", message), Self::Utf8EncodingError(_) => write!(f, "utf-8 encoding error"), } } @@ -205,19 +205,23 @@ impl RawDocument { /// ``` pub fn new(data: Vec) -> RawResult { if data.len() < 5 { - return Err(RawError::MalformedValue("document too short".into())); + return Err(RawError::MalformedValue { + message: "document too short".into(), + }); } let length = i32_from_slice(&data[..4]); if data.len() as i32 != length { - return Err(RawError::MalformedValue("document length incorrect".into())); + return Err(RawError::MalformedValue { + message: "document length incorrect".into(), + }); } if data[data.len() - 1] != 0 { - return Err(RawError::MalformedValue( - "document not null-terminated".into(), - )); + return Err(RawError::MalformedValue { + message: "document not null-terminated".into(), + }); } Ok(Self { @@ -389,19 +393,23 @@ impl RawDocumentRef { let data = data.as_ref(); if data.len() < 5 { - return Err(RawError::MalformedValue("document too short".into())); + return Err(RawError::MalformedValue { + message: "document too short".into(), + }); } let length = i32_from_slice(&data[..4]); if data.len() as i32 != length { - return Err(RawError::MalformedValue("document length incorrect".into())); + return Err(RawError::MalformedValue { + message: "document length incorrect".into(), + }); } if data[data.len() - 1] != 0 { - return Err(RawError::MalformedValue( - "document not null-terminated".into(), - )); + return Err(RawError::MalformedValue { + message: "document not null-terminated".into(), + }); } Ok(RawDocumentRef::new_unchecked(data)) @@ -803,9 +811,9 @@ impl<'a> Iterator for RawDocumentIter<'a> { // end of document marker return None; } else { - return Some(Err(RawError::MalformedValue( - "document not null terminated".into(), - ))); + return Some(Err(RawError::MalformedValue { + message: "document not null terminated".into(), + })); } } @@ -819,10 +827,9 @@ impl<'a> Iterator for RawDocumentIter<'a> { let element_type = match ElementType::from(self.doc.data[self.offset]) { Some(et) => et, None => { - return Some(Err(RawError::MalformedValue(format!( - "invalid tag: {}", - self.doc.data[self.offset] - )))) + return Some(Err(RawError::MalformedValue { + message: format!("invalid tag: {}", self.doc.data[self.offset]), + })) } }; @@ -833,9 +840,9 @@ impl<'a> Iterator for RawDocumentIter<'a> { 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; if self.doc.data[valueoffset + size - 1] != 0 { - return Some(Err(RawError::MalformedValue( - "string not null terminated".into(), - ))); + return Some(Err(RawError::MalformedValue { + message: "string not null terminated".into(), + })); } size @@ -844,9 +851,9 @@ impl<'a> Iterator for RawDocumentIter<'a> { let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; if self.doc.data[valueoffset + size - 1] != 0 { - return Some(Err(RawError::MalformedValue( - "document not null terminated".into(), - ))); + return Some(Err(RawError::MalformedValue { + message: "document not null terminated".into(), + })); } size @@ -855,9 +862,9 @@ impl<'a> Iterator for RawDocumentIter<'a> { let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; if self.doc.data[valueoffset + size - 1] != 0 { - return Some(Err(RawError::MalformedValue( - "array not null terminated".into(), - ))); + return Some(Err(RawError::MalformedValue { + message: "array not null terminated".into(), + })); } size @@ -891,9 +898,9 @@ impl<'a> Iterator for RawDocumentIter<'a> { let id_size = 12; if self.doc.data[valueoffset + string_size - 1] != 0 { - return Some(Err(RawError::MalformedValue( - "DBPointer string not null-terminated".into(), - ))); + return Some(Err(RawError::MalformedValue { + message: "DBPointer string not null-terminated".into(), + })); } string_size + id_size @@ -903,9 +910,9 @@ impl<'a> Iterator for RawDocumentIter<'a> { 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; if self.doc.data[valueoffset + size - 1] != 0 { - return Some(Err(RawError::MalformedValue( - "javascript code not null-terminated".into(), - ))); + return Some(Err(RawError::MalformedValue { + message: "javascript code not null-terminated".into(), + })); } size @@ -917,9 +924,9 @@ impl<'a> Iterator for RawDocumentIter<'a> { let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; if self.doc.data[valueoffset + size - 1] != 0 { - return Some(Err(RawError::MalformedValue( - "javascript with scope not null-terminated".into(), - ))); + return Some(Err(RawError::MalformedValue { + message: "javascript with scope not null-terminated".into(), + })); } size @@ -1135,13 +1142,15 @@ fn d128_from_slice(val: &[u8]) -> Decimal128 { fn read_nullterminated(buf: &[u8]) -> RawResult<&str> { let mut splits = buf.splitn(2, |x| *x == 0); - let value = splits - .next() - .ok_or_else(|| RawError::MalformedValue("no value".into()))?; + let value = splits.next().ok_or_else(|| RawError::MalformedValue { + message: "no value".into(), + })?; if splits.next().is_some() { Ok(try_to_str(value)?) } else { - Err(RawError::MalformedValue("expected null terminator".into())) + Err(RawError::MalformedValue { + message: "expected null terminator".into(), + }) } } From 1c8e85634894756c4fb9e258cf1e15850279285a Mon Sep 17 00:00:00 2001 From: Saghm Rossi Date: Tue, 16 Feb 2021 14:15:13 -0500 Subject: [PATCH 07/48] use raw code with scope type in RawBson --- src/raw/elem.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/raw/elem.rs b/src/raw/elem.rs index 24f8f433..0a5d1d95 100644 --- a/src/raw/elem.rs +++ b/src/raw/elem.rs @@ -209,15 +209,15 @@ impl<'a> RawBson<'a> { /// Gets the BSON JavaScript code with scope that's referenced or returns an error if the value /// isn't BSON JavaScript code with scope. - pub fn as_javascript_with_scope(self) -> RawResult<(&'a str, &'a RawDocumentRef)> { + pub fn as_javascript_with_scope(self) -> RawResult> { if let ElementType::JavaScriptCodeWithScope = self.element_type { let length = i32_from_slice(&self.data[..4]); assert_eq!(self.data.len() as i32, length); - let js = read_lenencoded(&self.data[4..])?; - let doc = RawDocumentRef::new(&self.data[9 + js.len()..])?; + let code = read_lenencoded(&self.data[4..])?; + let scope = RawDocumentRef::new(&self.data[9 + code.len()..])?; - Ok((js, doc)) + Ok(RawJavaScriptCodeWithScope { code, scope }) } else { Err(RawError::UnexpectedType) } @@ -318,9 +318,10 @@ impl<'a> TryFrom> for Bson { ElementType::DbPointer => panic!("Uh oh. Maybe this should be a TryFrom"), ElementType::Symbol => Bson::Symbol(String::from(rawbson.as_symbol()?)), ElementType::JavaScriptCodeWithScope => { - let (js, scope) = rawbson.as_javascript_with_scope()?; + let RawJavaScriptCodeWithScope { code, scope } = + rawbson.as_javascript_with_scope()?; Bson::JavaScriptCodeWithScope(crate::JavaScriptCodeWithScope { - code: String::from(js), + code: String::from(code), scope: scope.try_into()?, }) } From 12ddf7f1e3b096ca90e2d74fdc030860224c6acd Mon Sep 17 00:00:00 2001 From: Saghm Rossi Date: Tue, 16 Feb 2021 14:17:44 -0500 Subject: [PATCH 08/48] document RawArrayIter --- src/raw/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 04c07f81..506e4216 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -1087,6 +1087,7 @@ impl<'a> IntoIterator for &'a RawArray { } } +/// An iterator over borrwed raw BSON array values. pub struct RawArrayIter<'a> { inner: RawDocumentIter<'a>, } From 43485f7d10b152c87033e9bb1be54a7917698d23 Mon Sep 17 00:00:00 2001 From: Saghm Rossi Date: Tue, 16 Feb 2021 14:32:07 -0500 Subject: [PATCH 09/48] temp --- src/raw/array.rs | 154 +++++ src/raw/doc.rs | 814 +++++++++++++++++++++++++ src/raw/elem.rs | 52 +- src/raw/error.rs | 27 + src/raw/mod.rs | 1497 +--------------------------------------------- src/raw/test.rs | 460 ++++++++++++++ 6 files changed, 1499 insertions(+), 1505 deletions(-) create mode 100644 src/raw/array.rs create mode 100644 src/raw/doc.rs create mode 100644 src/raw/error.rs create mode 100644 src/raw/test.rs diff --git a/src/raw/array.rs b/src/raw/array.rs new file mode 100644 index 00000000..f814fac2 --- /dev/null +++ b/src/raw/array.rs @@ -0,0 +1,154 @@ +/// A BSON array referencing raw bytes stored elsewhere. +pub struct RawArray { + doc: RawDocumentRef, +} + +impl RawArray { + fn new(data: &[u8]) -> RawResult<&RawArray> { + Ok(RawArray::from_doc(RawDocumentRef::new(data)?)) + } + + fn from_doc(doc: &RawDocumentRef) -> &RawArray { + // SAFETY: + // + // Dereferencing a raw pointer requires unsafe due to the potential that the pointer is + // null, dangling, or misaligned. We know the pointer is not null or dangling due to the + // fact that it's created by a safe reference. Converting &RawDocumentRef to *const + // RawDocumentRef will be properly aligned due to them being references to the same type, + // and converting *const RawDocumentRef to *const RawArray is aligned due to the fact that + // the only field in a RawArray is a RawDocumentRef, meaning the structs are represented + // identically at the byte level. + unsafe { &*(doc as *const RawDocumentRef as *const RawArray) } + } + + /// Gets a reference to the value at the given index. + pub fn get(&self, index: usize) -> RawResult>> { + self.into_iter().nth(index).transpose() + } + + fn get_with<'a, T>( + &'a self, + index: usize, + f: impl FnOnce(elem::RawBson<'a>) -> RawResult, + ) -> RawResult> { + self.get(index)?.map(f).transpose() + } + + /// Gets the BSON double at the given index or returns an error if the value at that index isn't + /// a double. + pub fn get_f64(&self, index: usize) -> RawResult> { + self.get_with(index, elem::RawBson::as_f64) + } + + /// Gets a reference to the string at the given index or returns an error if the + /// value at that index isn't a string. + pub fn get_str(&self, index: usize) -> RawResult> { + self.get_with(index, elem::RawBson::as_str) + } + + /// Gets a reference to the document at the given index or returns an error if the + /// value at that index isn't a document. + pub fn get_document(&self, index: usize) -> RawResult> { + self.get_with(index, elem::RawBson::as_document) + } + + /// Gets a reference to the array at the given index or returns an error if the + /// value at that index isn't a array. + pub fn get_array(&self, index: usize) -> RawResult> { + self.get_with(index, elem::RawBson::as_array) + } + + /// Gets a reference to the BSON binary value at the given index or returns an error if the + /// value at that index isn't a binary. + pub fn get_binary(&self, index: usize) -> RawResult>> { + self.get_with(index, elem::RawBson::as_binary) + } + + /// Gets the ObjectId at the given index or returns an error if the value at that index isn't an + /// ObjectId. + pub fn get_object_id(&self, index: usize) -> RawResult> { + self.get_with(index, elem::RawBson::as_object_id) + } + + /// Gets the boolean at the given index or returns an error if the value at that index isn't a + /// boolean. + pub fn get_bool(&self, index: usize) -> RawResult> { + self.get_with(index, elem::RawBson::as_bool) + } + + /// Gets the DateTime at the given index or returns an error if the value at that index isn't a + /// DateTime. + pub fn get_datetime(&self, index: usize) -> RawResult>> { + self.get_with(index, elem::RawBson::as_datetime) + } + + /// Gets a reference to the BSON regex at the given index or returns an error if the + /// value at that index isn't a regex. + pub fn get_regex(&self, index: usize) -> RawResult>> { + self.get_with(index, elem::RawBson::as_regex) + } + + /// Gets a reference to the BSON timestamp at the given index or returns an error if the + /// value at that index isn't a timestamp. + pub fn get_timestamp(&self, index: usize) -> RawResult>> { + self.get_with(index, elem::RawBson::as_timestamp) + } + + /// Gets the BSON int32 at the given index or returns an error if the value at that index isn't + /// a 32-bit integer. + pub fn get_i32(&self, index: usize) -> RawResult> { + self.get_with(index, elem::RawBson::as_i32) + } + + /// Gets BSON int64 at the given index or returns an error if the value at that index isn't a + /// 64-bit integer. + pub fn get_i64(&self, index: usize) -> RawResult> { + self.get_with(index, elem::RawBson::as_i64) + } + + /// Gets a reference to the raw bytes of the RawArray. + pub fn as_bytes(&self) -> &[u8] { + self.doc.as_bytes() + } +} + +impl TryFrom<&RawArray> for Vec { + type Error = Error; + + fn try_from(arr: &RawArray) -> RawResult> { + arr.into_iter() + .map(|result| { + let rawbson = result?; + Bson::try_from(rawbson) + }) + .collect() + } +} + +impl<'a> IntoIterator for &'a RawArray { + type IntoIter = RawArrayIter<'a>; + type Item = RawResult>; + + fn into_iter(self) -> RawArrayIter<'a> { + RawArrayIter { + inner: self.doc.into_iter(), + } + } +} + +/// An iterator over borrwed raw BSON array values. +pub struct RawArrayIter<'a> { + inner: RawDocumentIter<'a>, +} + +impl<'a> Iterator for RawArrayIter<'a> { + type Item = RawResult>; + + fn next(&mut self) -> Option>> { + match self.inner.next() { + Some(Ok((_, v))) => Some(Ok(v)), + Some(Err(e)) => Some(Err(e)), + None => None, + } + } +} diff --git a/src/raw/doc.rs b/src/raw/doc.rs new file mode 100644 index 00000000..4565d541 --- /dev/null +++ b/src/raw/doc.rs @@ -0,0 +1,814 @@ +use std::{ + borrow::Borrow, + convert::{TryFrom, TryInto}, + ops::Deref, +}; + +use chrono::{DateTime, Utc}; + +use super::{i32_from_slice, Error, RawArray, RawBinary, RawBson, RawRegex, RawTimestamp, Result}; +#[cfg(feature = "decimal128")] +use crate::decimal128::Decimal128; +use crate::{oid::ObjectId, spec::ElementType, Bson, Document}; + +/// A BSON document, stored as raw bytes on the heap. This can be created from a `Vec` or +/// a [`bson::Document`]. +/// +/// Accessing elements within a `RawDocument` is similar to element access in [bson::Document], but +/// because the contents are parsed during iteration, instead of at creation time, format errors can +/// happen at any time during use. +/// +/// Iterating over a RawDocument yields either an error or a key-value pair that borrows from the +/// original document without making any additional allocations. +/// +/// ``` +/// # use bson::raw::{RawDocument, RawError}; +/// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +/// let mut iter = doc.iter(); +/// let (key, value) = iter.next().unwrap()?; +/// assert_eq!(key, "hi"); +/// assert_eq!(value.as_str(), Ok("y'all")); +/// assert!(iter.next().is_none()); +/// # Ok::<(), RawError>(()) +/// ``` +/// +/// Individual elements can be accessed using [`RawDocument::get`](RawDocument::get) or any of the +/// type-specific getters, such as [`RawDocument::get_object_id`](RawDocument::get_object_id) or +/// [`RawDocument::get_str`](RawDocument::get_str). Note that accessing elements is an O(N) +/// operation, as it requires iterating through the document from the beginning to find the +/// requested key. +/// +/// ``` +/// # use bson::raw::{RawDocument, RawError}; +/// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +/// assert_eq!(doc.get_str("hi")?, Some("y'all")); +/// # Ok::<(), RawError>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct RawDocument { + data: Box<[u8]>, +} + +impl RawDocument { + /// Constructs a new RawDocument, validating _only_ the + /// following invariants: + /// * `data` is at least five bytes long (the minimum for a valid BSON document) + /// * the initial four bytes of `data` accurately represent the length of the bytes as + /// required by the BSON spec. + /// * the last byte of `data` is a 0 + /// + /// Note that the internal structure of the bytes representing the + /// BSON elements is _not_ validated at all by this method. If the + /// bytes do not conform to the BSON spec, then method calls on + /// the RawDocument will return RawErrors where appropriate. + /// + /// ``` + /// # use bson::raw::{RawDocument, RawError}; + /// let doc = RawDocument::new(b"\x05\0\0\0\0".to_vec())?; + /// # Ok::<(), RawError>(()) + /// ``` + pub fn new(data: Vec) -> Result { + if data.len() < 5 { + return Err(Error::MalformedValue { + message: "document too short".into(), + }); + } + + let length = i32_from_slice(&data[..4]); + + if data.len() as i32 != length { + return Err(Error::MalformedValue { + message: "document length incorrect".into(), + }); + } + + if data[data.len() - 1] != 0 { + return Err(Error::MalformedValue { + message: "document not null-terminated".into(), + }); + } + + Ok(Self { + data: data.into_boxed_slice(), + }) + } + + /// Create a RawDocument from a Document. + /// + /// ``` + /// # use bson::raw::{RawDocument, RawError}; + /// use bson::{doc, oid::ObjectId}; + /// + /// let document = doc! { + /// "_id": ObjectId::new(), + /// "name": "Herman Melville", + /// "title": "Moby-Dick", + /// }; + /// let doc = RawDocument::from_document(&document); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn from_document(doc: &Document) -> RawDocument { + let mut data = Vec::new(); + doc.to_writer(&mut data).unwrap(); + + Self { + data: data.into_boxed_slice(), + } + } + + /// Gets an iterator over the elements in the `RawDocument`, which yields `Result<&str, + /// Element<'_>>`. + /// + /// ``` + /// # use bson::raw::{elem, RawDocument, RawError}; + /// use bson::doc; + /// + /// let doc = RawDocument::from_document(&doc! { "ferris": true }); + /// + /// for element in doc.iter() { + /// let (key, value) = element?; + /// assert_eq!(key, "ferris"); + /// assert_eq!(value.as_bool()?, true); + /// } + /// # Ok::<(), RawError>(()) + /// ``` + /// + /// # Note: + /// + /// There is no owning iterator for RawDocument. If you need ownership over + /// elements that might need to allocate, you must explicitly convert + /// them to owned types yourself. + pub fn iter(&self) -> RawDocumentIter<'_> { + self.into_iter() + } + + /// Return the contained data as a `Vec` + /// + /// ``` + /// # use bson::raw::RawDocument; + /// use bson::doc; + /// + /// let doc = RawDocument::from_document(&doc!{}); + /// assert_eq!(doc.into_inner(), b"\x05\x00\x00\x00\x00".to_vec()); + /// ``` + pub fn into_inner(self) -> Vec { + self.data.to_vec() + } +} + +impl TryFrom for Document { + type Error = Error; + + fn try_from(raw: RawDocument) -> Result { + Document::try_from(raw.as_ref()) + } +} + +impl<'a> IntoIterator for &'a RawDocument { + type IntoIter = RawDocumentIter<'a>; + type Item = Result<(&'a str, RawBson<'a>)>; + + fn into_iter(self) -> RawDocumentIter<'a> { + RawDocumentIter { + doc: &self, + offset: 4, + } + } +} + +impl AsRef for RawDocument { + fn as_ref(&self) -> &RawDocumentRef { + RawDocumentRef::new_unchecked(&self.data) + } +} + +impl Borrow for RawDocument { + fn borrow(&self) -> &RawDocumentRef { + &*self + } +} + +impl ToOwned for RawDocumentRef { + type Owned = RawDocument; + + fn to_owned(&self) -> Self::Owned { + self.to_raw_document() + } +} + +/// A BSON document referencing raw bytes stored elsewhere. This can be created from a +/// [RawDocument] or any type that contains valid BSON data, and can be referenced as a `[u8]`, +/// including static binary literals, [Vec](std::vec::Vec), or arrays. +/// +/// Accessing elements within a `RawDocumentRef` is similar to element access in [bson::Document], +/// but because the contents are parsed during iteration, instead of at creation time, format errors +/// can happen at any time during use. +/// +/// Iterating over a RawDocumentRef yields either an error or a key-value pair that borrows from the +/// original document without making any additional allocations. + +/// ``` +/// # use bson::raw::{Doc, RawError}; +/// let doc = RawDocumentRef::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00")?; +/// let mut iter = doc.into_iter(); +/// let (key, value) = iter.next().unwrap()?; +/// assert_eq!(key, "hi"); +/// assert_eq!(value.as_str(), Ok("y'all")); +/// assert!(iter.next().is_none()); +/// # Ok::<(), RawError>(()) +/// ``` +/// +/// Individual elements can be accessed using [`RawDocumentRef::get`](RawDocumentRef::get) or any of +/// the type-specific getters, such as +/// [`RawDocumentRef::get_object_id`](RawDocumentRef::get_object_id) or [`RawDocumentRef:: +/// get_str`](RawDocumentRef::get_str). Note that accessing elements is an O(N) operation, as it +/// requires iterating through the document from the beginning to find the requested key. +/// +/// ``` +/// # use bson::raw::{RawDocument, RawError}; +/// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +/// assert_eq!(doc.get_str("hi")?, Some("y'all")); +/// # Ok::<(), RawError>(()) +/// ``` +#[derive(Debug)] +pub struct RawDocumentRef { + data: [u8], +} + +impl RawDocumentRef { + /// Constructs a new RawDocumentRef, validating _only_ the + /// following invariants: + /// * `data` is at least five bytes long (the minimum for a valid BSON document) + /// * the initial four bytes of `data` accurately represent the length of the bytes as + /// required by the BSON spec. + /// * the last byte of `data` is a 0 + /// + /// Note that the internal structure of the bytes representing the + /// BSON elements is _not_ validated at all by this method. If the + /// bytes do not conform to the BSON spec, then method calls on + /// the RawDocument will return RawErrors where appropriate. + /// + /// ``` + /// # use bson::raw::{RawDocument, RawError}; + /// let doc = RawDocumentRef::new(b"\x05\0\0\0\0")?; + /// # Ok::<(), RawError>(()) + /// ``` + pub fn new + ?Sized>(data: &D) -> Result<&RawDocumentRef> { + let data = data.as_ref(); + + if data.len() < 5 { + return Err(Error::MalformedValue { + message: "document too short".into(), + }); + } + + let length = i32_from_slice(&data[..4]); + + if data.len() as i32 != length { + return Err(Error::MalformedValue { + message: "document length incorrect".into(), + }); + } + + if data[data.len() - 1] != 0 { + return Err(Error::MalformedValue { + message: "document not null-terminated".into(), + }); + } + + Ok(RawDocumentRef::new_unchecked(data)) + } + + /// Creates a new Doc referencing the provided data slice. + fn new_unchecked + ?Sized>(data: &D) -> &RawDocumentRef { + // SAFETY: + // + // Dereferencing a raw pointer requires unsafe due to the potential that the pointer is + // null, dangling, or misaligned. We know the pointer is not null or dangling due to the + // fact that it's created by a safe reference. Converting &[u8] to *const [u8] will be + // properly aligned due to them being references to the same type, and converting *const + // [u8] to *const RawDocumentRef is aligned due to the fact that the only field in a + // RawDocumentRef is a [u8], meaning the structs are represented identically at the byte + // level. + unsafe { &*(data.as_ref() as *const [u8] as *const RawDocumentRef) } + } + + /// Creates a new RawDocument with an owned copy of the BSON bytes. + /// + /// ``` + /// # use bson::raw::{Doc, RawError}; + /// use bson::raw::RawDocument; + /// + /// let data = b"\x05\0\0\0\0"; + /// let doc_ref = RawDocumentRef::new(data)?; + /// let doc: RawDocument = doc_ref.to_raw_document(); + /// # Ok::<(), RawError>(()) + pub fn to_raw_document(&self) -> RawDocument { + RawDocument { + data: self.data.to_owned().into_boxed_slice(), + } + } + + /// Gets a reference to the value corresponding to the given key by iterating until the key is + /// found. + /// + /// ``` + /// # use bson::raw::{RawDocument, elem::Element, RawError}; + /// # + /// use bson::{doc, oid::ObjectId}; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "_id": ObjectId::new(), + /// "f64": 2.5, + /// }); + /// + /// let element = doc.get("f64")?.expect("finding key f64"); + /// assert_eq!(element.as_f64(), Ok(2.5)); + /// assert!(doc.get("unknown")?.is_none()); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get<'a>(&'a self, key: &str) -> Result>> { + for result in self.into_iter() { + let (k, v) = result?; + if key == k { + return Ok(Some(v)); + } + } + Ok(None) + } + + fn get_with<'a, T>( + &'a self, + key: &str, + f: impl FnOnce(RawBson<'a>) -> Result, + ) -> Result> { + self.get(key)?.map(f).transpose() + } + + /// Gets a reference to the BSON double value corresponding to a given key or returns an error + /// if the key corresponds to a value which isn't a double. + /// + /// ``` + /// # use bson::raw::{RawDocument, elem::Element, RawError}; + /// use bson::doc; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "bool": true, + /// "f64": 2.5, + /// }); + /// + /// assert_eq!(doc.get_f64("f64"), Ok(Some(2.5))); + /// assert_eq!(doc.get_f64("bool"), Err(RawError::UnexpectedType)); + /// assert_eq!(doc.get_f64("unknown"), Ok(None)); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_f64(&self, key: &str) -> Result> { + self.get_with(key, RawBson::as_f64) + } + + /// Gets a reference to the string value corresponding to a given key or returns an error if the + /// key corresponds to a value which isn't a string. + /// + /// ``` + /// # use bson::raw::{RawDocument, elem::Element, RawError}; + /// use bson::doc; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "string": "hello", + /// "bool": true, + /// }); + /// + /// assert_eq!(doc.get_str("string"), Ok(Some("hello"))); + /// assert_eq!(doc.get_str("bool"), Err(RawError::UnexpectedType)); + /// assert_eq!(doc.get_str("unknown"), Ok(None)); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_str<'a>(&'a self, key: &str) -> Result> { + self.get_with(key, RawBson::as_str) + } + + /// Gets a reference to the document value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a document. + /// + /// ``` + /// # use bson::raw::{RawDocument, elem::Element, RawError}; + /// use bson::doc; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "doc": { "key": "value"}, + /// "bool": true, + /// }); + /// + /// assert_eq!(doc.get_document("doc")?.expect("finding key doc").get_str("key"), Ok(Some("value"))); + /// assert_eq!(doc.get_document("bool").unwrap_err(), RawError::UnexpectedType); + /// assert!(doc.get_document("unknown")?.is_none()); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_document<'a>(&'a self, key: &str) -> Result> { + self.get_with(key, RawBson::as_document) + } + + /// Gets a reference to the array value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't an array. + /// + /// ``` + /// # use bson::raw::{RawDocument, elem::Element, RawError}; + /// use bson::doc; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "array": [true, 3], + /// "bool": true, + /// }); + /// + /// let mut arr_iter = docbuf.get_array("array")?.expect("finding key array").into_iter(); + /// let _: bool = arriter.next().unwrap()?.as_bool()?; + /// let _: i32 = arriter.next().unwrap()?.as_i32()?; + /// + /// assert!(arr_iter.next().is_none()); + /// assert!(doc.get_array("bool").is_err()); + /// assert!(doc.get_array("unknown")?.is_none()); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_array<'a>(&'a self, key: &str) -> Result> { + self.get_with(key, RawBson::as_array) + } + + /// Gets a reference to the BSON binary value corresponding to a given key or returns an error + /// if the key corresponds to a value which isn't a binary value. + /// + /// ``` + /// # use bson::raw::{RawDocument, elem, RawError}; + /// + /// use bson::{ + /// spec::BinarySubtype + /// doc, Binary, + /// }; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1, 2, 3] }, + /// "bool": true, + /// }); + /// + /// assert_eq!(doc.get_binary("binary")?.map(elem::RawBsonBinary::as_bytes), Some(&[1, 2, 3][..])); + /// assert_eq!(doc.get_binary("bool").unwrap_err(), RawError::UnexpectedType); + /// assert!(doc.get_binary("unknown")?.is_none()); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_binary<'a>(&'a self, key: &str) -> Result>> { + self.get_with(key, RawBson::as_binary) + } + + /// Gets a reference to the ObjectId value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't an ObjectId. + /// + /// ``` + /// # use bson::raw::{RawDocument, RawError}; + /// use bson::{doc, oid::ObjectId}; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "_id": ObjectId::new(), + /// "bool": true, + /// }); + /// + /// let oid = doc.get_object_id("_id")?.unwrap(); + /// assert_eq!(doc.get_object_id("bool").unwrap_err(), RawError::UnexpectedType); + /// assert!(doc.get_object_id("unknown")?.is_none()); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_object_id(&self, key: &str) -> Result> { + self.get_with(key, RawBson::as_object_id) + } + + /// Gets a reference to the boolean value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a boolean. + /// + /// ``` + /// # use bson::raw::{RawDocument, RawError}; + /// use bson::{doc, oid::ObjectId}; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "_id": ObjectId::new(), + /// "bool": true, + /// }); + /// + /// assert!(doc.get_bool("bool")?.unwrap()); + /// assert_eq!(doc.get_bool("_id").unwrap_err(), RawError::UnexpectedType); + /// assert!(doc.get_object_id("unknown")?.is_none()); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_bool(&self, key: &str) -> Result> { + self.get_with(key, RawBson::as_bool) + } + + /// Gets a reference to the BSON DateTime value corresponding to a given key or returns an error + /// if the key corresponds to a value which isn't a DateTime. + /// + /// ``` + /// # use bson::raw::{RawDocument, RawError}; + /// use bson::doc; + /// use chrono::{Utc, Datelike, TimeZone}; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "created_at": Utc.ymd(2020, 3, 15).and_hms(17, 0, 0), + /// "bool": true, + /// }); + /// assert_eq!(doc.get_datetime("created_at")?.unwrap().year(), 2020); + /// assert_eq!(doc.get_datetime("bool").unwrap_err(), RawError::UnexpectedType); + /// assert!(doc.get_datetime("unknown")?.is_none()); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_datetime(&self, key: &str) -> Result>> { + self.get_with(key, RawBson::as_datetime) + } + /// Gets a reference to the BSON regex value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a regex. + /// + /// ``` + /// # use bson::raw::{RawDocument, RawError, elem}; + /// use bson::{doc, Regex}; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "regex": Regex { + /// pattern: r"end\s*$".into(), + /// options: "i".into(), + /// }, + /// "bool": true, + /// }); + /// + /// assert_eq!(doc.get_regex("regex")?.unwrap().pattern(), r"end\s*$"); + /// assert_eq!(doc.get_regex("regex")?.unwrap().options(), "i"); + /// assert_eq!(doc.get_regex("bool").unwrap_err(), RawError::UnexpectedType); + /// assert!(doc.get_regex("unknown")?.is_none()); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_regex<'a>(&'a self, key: &str) -> Result>> { + self.get_with(key, RawBson::as_regex) + } + + /// Gets a reference to the BSON timestamp value corresponding to a given key or returns an + /// error if the key corresponds to a value which isn't a timestamp. + /// + /// ``` + /// # use bson::raw::{RawDocument, elem, RawError}; + /// use bson::{doc, Timestamp}; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "bool": true, + /// "ts": Timestamp { time: 649876543, increment: 9 }, + /// }); + /// + /// let timestamp = doc.get_timestamp("ts")?.unwrap(); + /// + /// assert_eq!(timestamp.time(), 649876543); + /// assert_eq!(timestamp.increment(), 9); + /// assert_eq!(doc.get_timestamp("bool"), Err(RawError::UnexpectedType)); + /// assert_eq!(doc.get_timestamp("unknown"), Ok(None)); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_timestamp<'a>(&'a self, key: &str) -> Result>> { + self.get_with(key, RawBson::as_timestamp) + } + + /// Gets a reference to the BSON int32 value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a 32-bit integer. + /// + /// ``` + /// # use bson::raw::{RawDocument, RawError}; + /// use bson::doc; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "bool": true, + /// "i32": 1_000_000, + /// }); + /// + /// assert_eq!(doc.get_i32("i32"), Ok(Some(1_000_000))); + /// assert_eq!(doc.get_i32("bool"), Err(RawError::UnexpectedType)); + /// assert_eq!(doc.get_i32("unknown"), Ok(None)); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_i32(&self, key: &str) -> Result> { + self.get_with(key, RawBson::as_i32) + } + + /// Gets a reference to the BSON int64 value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a 64-bit integer. + /// + /// ``` + /// # use bson::raw::{RawDocument, elem::Element, RawError}; + /// use bson::doc; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "bool": true, + /// "i64": 9223372036854775807_i64, + /// }); + /// + /// assert_eq!(doc.get_i64("i64"), Ok(Some(9223372036854775807))); + /// assert_eq!(doc.get_i64("bool"), Err(RawError::UnexpectedType)); + /// assert_eq!(doc.get_i64("unknown"), Ok(None)); + /// # Ok::<(), RawError>(()) + /// ``` + pub fn get_i64(&self, key: &str) -> Result> { + self.get_with(key, RawBson::as_i64) + } + + /// Return a reference to the contained data as a `&[u8]` + /// + /// ``` + /// # use bson::raw::RawDocument; + /// use bson::doc; + /// let docbuf = RawDocument::from_document(&doc!{}); + /// assert_eq!(docbuf.as_bytes(), b"\x05\x00\x00\x00\x00"); + /// ``` + pub fn as_bytes(&self) -> &[u8] { + &self.data + } +} + +impl AsRef for RawDocumentRef { + fn as_ref(&self) -> &RawDocumentRef { + self + } +} + +impl Deref for RawDocument { + type Target = RawDocumentRef; + + fn deref(&self) -> &Self::Target { + RawDocumentRef::new_unchecked(&self.data) + } +} + +impl TryFrom<&RawDocumentRef> for crate::Document { + type Error = Error; + + fn try_from(rawdoc: &RawDocumentRef) -> Result { + rawdoc + .into_iter() + .map(|res| res.and_then(|(k, v)| Ok((k.to_owned(), v.try_into()?)))) + .collect() + } +} + +impl<'a> IntoIterator for &'a RawDocumentRef { + type IntoIter = RawDocumentIter<'a>; + type Item = RawResult<(&'a str, RawBson<'a>)>; + + fn into_iter(self) -> RawDocumentIter<'a> { + RawDocumentIter { + doc: self, + offset: 4, + } + } +} + +pub struct RawDocumentIter<'a> { + doc: &'a RawDocumentRef, + offset: usize, +} + +impl<'a> Iterator for RawDocumentIter<'a> { + type Item = RawResult<(&'a str, elem::RawBson<'a>)>; + + fn next(&mut self) -> Option)>> { + if self.offset == self.doc.data.len() - 1 { + if self.doc.data[self.offset] == 0 { + // end of document marker + return None; + } else { + return Some(Err(RawError::MalformedValue { + message: "document not null terminated".into(), + })); + } + } + + let key = match read_nullterminated(&self.doc.data[self.offset + 1..]) { + Ok(key) => key, + Err(err) => return Some(Err(err)), + }; + + let valueoffset = self.offset + 1 + key.len() + 1; // type specifier + key + \0 + + let element_type = match ElementType::from(self.doc.data[self.offset]) { + Some(et) => et, + None => { + return Some(Err(RawError::MalformedValue { + message: format!("invalid tag: {}", self.doc.data[self.offset]), + })) + } + }; + + let element_size = match element_type { + ElementType::Double => 8, + ElementType::String => { + let size = + 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + + if self.doc.data[valueoffset + size - 1] != 0 { + return Some(Err(RawError::MalformedValue { + message: "string not null terminated".into(), + })); + } + + size + } + ElementType::EmbeddedDocument => { + let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + + if self.doc.data[valueoffset + size - 1] != 0 { + return Some(Err(RawError::MalformedValue { + message: "document not null terminated".into(), + })); + } + + size + } + ElementType::Array => { + let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + + if self.doc.data[valueoffset + size - 1] != 0 { + return Some(Err(RawError::MalformedValue { + message: "array not null terminated".into(), + })); + } + + size + } + ElementType::Binary => { + 5 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize + } + ElementType::Undefined => 0, + ElementType::ObjectId => 12, + ElementType::Boolean => 1, + ElementType::DateTime => 8, + ElementType::Null => 0, + ElementType::RegularExpression => { + let regex = match read_nullterminated(&self.doc.data[valueoffset..]) { + Ok(regex) => regex, + Err(err) => return Some(Err(err)), + }; + + let options = + match read_nullterminated(&self.doc.data[valueoffset + regex.len() + 1..]) { + Ok(options) => options, + Err(err) => return Some(Err(err)), + }; + + regex.len() + options.len() + 2 + } + ElementType::DbPointer => { + let string_size = + 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + + let id_size = 12; + + if self.doc.data[valueoffset + string_size - 1] != 0 { + return Some(Err(RawError::MalformedValue { + message: "DBPointer string not null-terminated".into(), + })); + } + + string_size + id_size + } + ElementType::JavaScriptCode => { + let size = + 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + + if self.doc.data[valueoffset + size - 1] != 0 { + return Some(Err(RawError::MalformedValue { + message: "javascript code not null-terminated".into(), + })); + } + + size + } + ElementType::Symbol => { + 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize + } + ElementType::JavaScriptCodeWithScope => { + let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + + if self.doc.data[valueoffset + size - 1] != 0 { + return Some(Err(RawError::MalformedValue { + message: "javascript with scope not null-terminated".into(), + })); + } + + size + } + ElementType::Int32 => 4, + ElementType::Timestamp => 8, + ElementType::Int64 => 8, + ElementType::Decimal128 => 16, + ElementType::MaxKey => 0, + ElementType::MinKey => 0, + }; + + let nextoffset = valueoffset + element_size; + self.offset = nextoffset; + + Some(Ok(( + key, + elem::RawBson::new(element_type, &self.doc.data[valueoffset..nextoffset]), + ))) + } +} diff --git a/src/raw/elem.rs b/src/raw/elem.rs index 0a5d1d95..549d979e 100644 --- a/src/raw/elem.rs +++ b/src/raw/elem.rs @@ -13,9 +13,9 @@ use super::{ read_lenencoded, read_nullterminated, u32_from_slice, + Error, RawArray, RawDocumentRef, - RawError, RawResult, }; use crate::{ @@ -50,12 +50,12 @@ impl<'a> RawBson<'a> { pub fn as_f64(self) -> RawResult { if let ElementType::Double = self.element_type { Ok(f64::from_bits(u64::from_le_bytes( - self.data.try_into().map_err(|_| RawError::MalformedValue { + self.data.try_into().map_err(|_| Error::MalformedValue { message: "f64 should be 8 bytes long".into(), })?, ))) } else { - Err(RawError::UnexpectedType) + Err(Error::UnexpectedType) } } @@ -64,7 +64,7 @@ impl<'a> RawBson<'a> { if let ElementType::String = self.element_type { read_lenencoded(self.data) } else { - Err(RawError::UnexpectedType) + Err(Error::UnexpectedType) } } @@ -73,7 +73,7 @@ impl<'a> RawBson<'a> { if let ElementType::EmbeddedDocument = self.element_type { RawDocumentRef::new(self.data) } else { - Err(RawError::UnexpectedType) + Err(Error::UnexpectedType) } } @@ -82,7 +82,7 @@ impl<'a> RawBson<'a> { if let ElementType::Array = self.element_type { RawArray::new(self.data) } else { - Err(RawError::UnexpectedType) + Err(Error::UnexpectedType) } } @@ -92,20 +92,20 @@ impl<'a> RawBson<'a> { let length = i32_from_slice(&self.data[0..4]); let subtype = BinarySubtype::from(self.data[4]); // TODO: This mishandles reserved values if self.data.len() as i32 != length + 5 { - return Err(RawError::MalformedValue { + return Err(Error::MalformedValue { message: "binary bson has wrong declared length".into(), }); } let data = match subtype { BinarySubtype::BinaryOld => { if length < 4 { - return Err(RawError::MalformedValue { + return Err(Error::MalformedValue { message: "old binary subtype has no inner declared length".into(), }); } let oldlength = i32_from_slice(&self.data[5..9]); if oldlength + 4 != length { - return Err(RawError::MalformedValue { + return Err(Error::MalformedValue { message: "old binary subtype has wrong inner declared length".into(), }); } @@ -115,7 +115,7 @@ impl<'a> RawBson<'a> { }; Ok(RawBinary::new(subtype, data)) } else { - Err(RawError::UnexpectedType) + Err(Error::UnexpectedType) } } @@ -123,12 +123,12 @@ impl<'a> RawBson<'a> { pub fn as_object_id(self) -> RawResult { if let ElementType::ObjectId = self.element_type { Ok(ObjectId::with_bytes(self.data.try_into().map_err( - |_| RawError::MalformedValue { + |_| Error::MalformedValue { message: "object id should be 12 bytes long".into(), }, )?)) } else { - Err(RawError::UnexpectedType) + Err(Error::UnexpectedType) } } @@ -136,20 +136,20 @@ impl<'a> RawBson<'a> { pub fn as_bool(self) -> RawResult { if let ElementType::Boolean = self.element_type { if self.data.len() != 1 { - Err(RawError::MalformedValue { + Err(Error::MalformedValue { message: "boolean has length != 1".into(), }) } else { match self.data[0] { 0 => Ok(false), 1 => Ok(true), - _ => Err(RawError::MalformedValue { + _ => Err(Error::MalformedValue { message: "boolean value was not 0 or 1".into(), }), } } } else { - Err(RawError::UnexpectedType) + Err(Error::UnexpectedType) } } @@ -175,7 +175,7 @@ impl<'a> RawBson<'a> { Ok(Utc.timestamp(secs, nanos)) } } else { - Err(RawError::UnexpectedType) + Err(Error::UnexpectedType) } } @@ -184,7 +184,7 @@ impl<'a> RawBson<'a> { if let ElementType::RegularExpression = self.element_type { RawRegex::new(self.data) } else { - Err(RawError::UnexpectedType) + Err(Error::UnexpectedType) } } @@ -194,7 +194,7 @@ impl<'a> RawBson<'a> { if let ElementType::JavaScriptCode = self.element_type { read_lenencoded(self.data) } else { - Err(RawError::UnexpectedType) + Err(Error::UnexpectedType) } } @@ -203,7 +203,7 @@ impl<'a> RawBson<'a> { if let ElementType::Symbol = self.element_type { read_lenencoded(self.data) } else { - Err(RawError::UnexpectedType) + Err(Error::UnexpectedType) } } @@ -219,7 +219,7 @@ impl<'a> RawBson<'a> { Ok(RawJavaScriptCodeWithScope { code, scope }) } else { - Err(RawError::UnexpectedType) + Err(Error::UnexpectedType) } } @@ -230,7 +230,7 @@ impl<'a> RawBson<'a> { assert_eq!(self.data.len(), 8); Ok(RawTimestamp { data: self.data }) } else { - Err(RawError::UnexpectedType) + Err(Error::UnexpectedType) } } @@ -240,7 +240,7 @@ impl<'a> RawBson<'a> { assert_eq!(self.data.len(), 4); Ok(i32_from_slice(self.data)) } else { - Err(RawError::UnexpectedType) + Err(Error::UnexpectedType) } } @@ -250,7 +250,7 @@ impl<'a> RawBson<'a> { assert_eq!(self.data.len(), 8); Ok(i64_from_slice(self.data)) } else { - Err(RawError::UnexpectedType) + Err(Error::UnexpectedType) } } @@ -267,7 +267,7 @@ impl<'a> RawBson<'a> { } impl<'a> TryFrom> for Bson { - type Error = RawError; + type Error = Error; fn try_from(rawbson: RawBson<'a>) -> RawResult { Ok(match rawbson.element_type { @@ -329,7 +329,7 @@ impl<'a> TryFrom> for Bson { ElementType::Decimal128 => Bson::Decimal128(rawbson.as_decimal128()?), #[cfg(not(feature = "decimal128"))] - ElementType::Decimal128 => return Err(RawError::UnexpectedType), + ElementType::Decimal128 => return Err(Error::UnexpectedType), ElementType::MaxKey => unimplemented!(), ElementType::MinKey => unimplemented!(), }) @@ -376,7 +376,7 @@ impl<'a> RawRegex<'a> { options: opts, }) } else { - Err(RawError::MalformedValue { + Err(Error::MalformedValue { message: "expected two null-terminated strings".into(), }) } diff --git a/src/raw/error.rs b/src/raw/error.rs new file mode 100644 index 00000000..548bee5e --- /dev/null +++ b/src/raw/error.rs @@ -0,0 +1,27 @@ +/// An error that occurs when attempting to parse raw BSON bytes. +#[derive(Debug, PartialEq)] +pub enum Error { + /// A BSON value did not fit the expected type. + UnexpectedType, + + /// A BSON value did not fit the proper format. + MalformedValue { message: String }, + + /// Improper UTF-8 bytes were found when proper UTF-8 was expected. The error value contains + /// the malformed data as bytes. + Utf8EncodingError(Vec), +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::UnexpectedType => write!(f, "unexpected type"), + Self::MalformedValue { message } => write!(f, "malformed value: {:?}", message), + Self::Utf8EncodingError(_) => write!(f, "utf-8 encoding error"), + } + } +} + +impl std::error::Error for Error {} + +pub type Result = Result; diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 506e4216..5f37dc26 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -1,7 +1,7 @@ //! A RawDocument can be created from a `Vec` containing raw BSON data, and elements //! accessed via methods similar to those available on the Document type. Note that rawbson returns -//! a RawResult>, since the bytes contained in the document are not fully validated until -//! trying to access the contained data. +//! a raw::Result>, since the bytes contained in the document are not fully validated +//! until trying to access the contained data. //! //! ```rust //! use bson::raw::{ @@ -102,1007 +102,21 @@ //! # Ok::<(), bson::raw::RawError>(()) //! ``` +mod array; +mod doc; mod elem; +mod error; #[cfg(test)] mod props; +#[cfg(test)] +mod test; -use std::{ - borrow::Borrow, - convert::{TryFrom, TryInto}, - ops::Deref, +pub use self::{ + array::{RawArray, RawArrayIter}, + doc::{RawDocument, RawDocumentIter, RawDocumentRef}, + elem::{RawBinary, RawBson, RawJavaScriptCodeWithScope, RawRegex, RawTimestamp}, + error::{Error, Result}, }; - -use chrono::{DateTime, Utc}; - -#[cfg(feature = "decimal128")] -use crate::decimal128::Decimal128; -use crate::{oid::ObjectId, spec::ElementType, Bson, Document}; -pub use elem::{RawBinary, RawBson, RawJavaScriptCodeWithScope, RawRegex, RawTimestamp}; - -/// An error that occurs when attempting to parse raw BSON bytes. -#[derive(Debug, PartialEq)] -pub enum RawError { - /// A BSON value did not fit the expected type. - UnexpectedType, - - /// A BSON value did not fit the proper format. - MalformedValue { message: String }, - - /// Improper UTF-8 bytes were found when proper UTF-8 was expected. The error value contains - /// the malformed data as bytes. - Utf8EncodingError(Vec), -} - -impl std::fmt::Display for RawError { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - match self { - Self::UnexpectedType => write!(f, "unexpected type"), - Self::MalformedValue { message } => write!(f, "malformed value: {:?}", message), - Self::Utf8EncodingError(_) => write!(f, "utf-8 encoding error"), - } - } -} - -impl std::error::Error for RawError {} - -pub type RawResult = Result; - -/// A BSON document, stored as raw bytes on the heap. This can be created from a `Vec` or -/// a [`bson::Document`]. -/// -/// Accessing elements within a `RawDocument` is similar to element access in [bson::Document], but -/// because the contents are parsed during iteration, instead of at creation time, format errors can -/// happen at any time during use. -/// -/// Iterating over a RawDocument yields either an error or a key-value pair that borrows from the -/// original document without making any additional allocations. -/// -/// ``` -/// # use bson::raw::{RawDocument, RawError}; -/// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; -/// let mut iter = doc.iter(); -/// let (key, value) = iter.next().unwrap()?; -/// assert_eq!(key, "hi"); -/// assert_eq!(value.as_str(), Ok("y'all")); -/// assert!(iter.next().is_none()); -/// # Ok::<(), RawError>(()) -/// ``` -/// -/// Individual elements can be accessed using [`RawDocument::get`](RawDocument::get) or any of the -/// type-specific getters, such as [`RawDocument::get_object_id`](RawDocument::get_object_id) or -/// [`RawDocument::get_str`](RawDocument::get_str). Note that accessing elements is an O(N) -/// operation, as it requires iterating through the document from the beginning to find the -/// requested key. -/// -/// ``` -/// # use bson::raw::{RawDocument, RawError}; -/// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; -/// assert_eq!(doc.get_str("hi")?, Some("y'all")); -/// # Ok::<(), RawError>(()) -/// ``` -#[derive(Clone, Debug)] -pub struct RawDocument { - data: Box<[u8]>, -} - -impl RawDocument { - /// Constructs a new RawDocument, validating _only_ the - /// following invariants: - /// * `data` is at least five bytes long (the minimum for a valid BSON document) - /// * the initial four bytes of `data` accurately represent the length of the bytes as - /// required by the BSON spec. - /// * the last byte of `data` is a 0 - /// - /// Note that the internal structure of the bytes representing the - /// BSON elements is _not_ validated at all by this method. If the - /// bytes do not conform to the BSON spec, then method calls on - /// the RawDocument will return RawErrors where appropriate. - /// - /// ``` - /// # use bson::raw::{RawDocument, RawError}; - /// let doc = RawDocument::new(b"\x05\0\0\0\0".to_vec())?; - /// # Ok::<(), RawError>(()) - /// ``` - pub fn new(data: Vec) -> RawResult { - if data.len() < 5 { - return Err(RawError::MalformedValue { - message: "document too short".into(), - }); - } - - let length = i32_from_slice(&data[..4]); - - if data.len() as i32 != length { - return Err(RawError::MalformedValue { - message: "document length incorrect".into(), - }); - } - - if data[data.len() - 1] != 0 { - return Err(RawError::MalformedValue { - message: "document not null-terminated".into(), - }); - } - - Ok(Self { - data: data.into_boxed_slice(), - }) - } - - /// Create a RawDocument from a Document. - /// - /// ``` - /// # use bson::raw::{RawDocument, RawError}; - /// use bson::{doc, oid::ObjectId}; - /// - /// let document = doc! { - /// "_id": ObjectId::new(), - /// "name": "Herman Melville", - /// "title": "Moby-Dick", - /// }; - /// let doc = RawDocument::from_document(&document); - /// # Ok::<(), RawError>(()) - /// ``` - pub fn from_document(doc: &Document) -> RawDocument { - let mut data = Vec::new(); - doc.to_writer(&mut data).unwrap(); - - Self { - data: data.into_boxed_slice(), - } - } - - /// Gets an iterator over the elements in the `RawDocument`, which yields `Result<&str, - /// Element<'_>>`. - /// - /// ``` - /// # use bson::raw::{elem, RawDocument, RawError}; - /// use bson::doc; - /// - /// let doc = RawDocument::from_document(&doc! { "ferris": true }); - /// - /// for element in doc.iter() { - /// let (key, value) = element?; - /// assert_eq!(key, "ferris"); - /// assert_eq!(value.as_bool()?, true); - /// } - /// # Ok::<(), RawError>(()) - /// ``` - /// - /// # Note: - /// - /// There is no owning iterator for RawDocument. If you need ownership over - /// elements that might need to allocate, you must explicitly convert - /// them to owned types yourself. - pub fn iter(&self) -> RawDocumentIter<'_> { - self.into_iter() - } - - /// Return the contained data as a `Vec` - /// - /// ``` - /// # use bson::raw::RawDocument; - /// use bson::doc; - /// - /// let doc = RawDocument::from_document(&doc!{}); - /// assert_eq!(doc.into_inner(), b"\x05\x00\x00\x00\x00".to_vec()); - /// ``` - pub fn into_inner(self) -> Vec { - self.data.to_vec() - } -} - -impl TryFrom for Document { - type Error = RawError; - - fn try_from(raw: RawDocument) -> RawResult { - Document::try_from(raw.as_ref()) - } -} - -impl<'a> IntoIterator for &'a RawDocument { - type IntoIter = RawDocumentIter<'a>; - type Item = RawResult<(&'a str, RawBson<'a>)>; - - fn into_iter(self) -> RawDocumentIter<'a> { - RawDocumentIter { - doc: &self, - offset: 4, - } - } -} - -impl AsRef for RawDocument { - fn as_ref(&self) -> &RawDocumentRef { - RawDocumentRef::new_unchecked(&self.data) - } -} - -impl Borrow for RawDocument { - fn borrow(&self) -> &RawDocumentRef { - &*self - } -} - -impl ToOwned for RawDocumentRef { - type Owned = RawDocument; - - fn to_owned(&self) -> Self::Owned { - self.to_raw_document() - } -} - -/// A BSON document referencing raw bytes stored elsewhere. This can be created from a -/// [RawDocument] or any type that contains valid BSON data, and can be referenced as a `[u8]`, -/// including static binary literals, [Vec](std::vec::Vec), or arrays. -/// -/// Accessing elements within a `RawDocumentRef` is similar to element access in [bson::Document], -/// but because the contents are parsed during iteration, instead of at creation time, format errors -/// can happen at any time during use. -/// -/// Iterating over a RawDocumentRef yields either an error or a key-value pair that borrows from the -/// original document without making any additional allocations. - -/// ``` -/// # use bson::raw::{Doc, RawError}; -/// let doc = RawDocumentRef::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00")?; -/// let mut iter = doc.into_iter(); -/// let (key, value) = iter.next().unwrap()?; -/// assert_eq!(key, "hi"); -/// assert_eq!(value.as_str(), Ok("y'all")); -/// assert!(iter.next().is_none()); -/// # Ok::<(), RawError>(()) -/// ``` -/// -/// Individual elements can be accessed using [`RawDocumentRef::get`](RawDocumentRef::get) or any of -/// the type-specific getters, such as -/// [`RawDocumentRef::get_object_id`](RawDocumentRef::get_object_id) or [`RawDocumentRef:: -/// get_str`](RawDocumentRef::get_str). Note that accessing elements is an O(N) operation, as it -/// requires iterating through the document from the beginning to find the requested key. -/// -/// ``` -/// # use bson::raw::{RawDocument, RawError}; -/// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; -/// assert_eq!(doc.get_str("hi")?, Some("y'all")); -/// # Ok::<(), RawError>(()) -/// ``` -#[derive(Debug)] -pub struct RawDocumentRef { - data: [u8], -} - -impl RawDocumentRef { - /// Constructs a new RawDocumentRef, validating _only_ the - /// following invariants: - /// * `data` is at least five bytes long (the minimum for a valid BSON document) - /// * the initial four bytes of `data` accurately represent the length of the bytes as - /// required by the BSON spec. - /// * the last byte of `data` is a 0 - /// - /// Note that the internal structure of the bytes representing the - /// BSON elements is _not_ validated at all by this method. If the - /// bytes do not conform to the BSON spec, then method calls on - /// the RawDocument will return RawErrors where appropriate. - /// - /// ``` - /// # use bson::raw::{RawDocument, RawError}; - /// let doc = RawDocumentRef::new(b"\x05\0\0\0\0")?; - /// # Ok::<(), RawError>(()) - /// ``` - pub fn new + ?Sized>(data: &D) -> RawResult<&RawDocumentRef> { - let data = data.as_ref(); - - if data.len() < 5 { - return Err(RawError::MalformedValue { - message: "document too short".into(), - }); - } - - let length = i32_from_slice(&data[..4]); - - if data.len() as i32 != length { - return Err(RawError::MalformedValue { - message: "document length incorrect".into(), - }); - } - - if data[data.len() - 1] != 0 { - return Err(RawError::MalformedValue { - message: "document not null-terminated".into(), - }); - } - - Ok(RawDocumentRef::new_unchecked(data)) - } - - /// Creates a new Doc referencing the provided data slice. - fn new_unchecked + ?Sized>(data: &D) -> &RawDocumentRef { - // SAFETY: - // - // Dereferencing a raw pointer requires unsafe due to the potential that the pointer is - // null, dangling, or misaligned. We know the pointer is not null or dangling due to the - // fact that it's created by a safe reference. Converting &[u8] to *const [u8] will be - // properly aligned due to them being references to the same type, and converting *const - // [u8] to *const RawDocumentRef is aligned due to the fact that the only field in a - // RawDocumentRef is a [u8], meaning the structs are represented identically at the byte - // level. - unsafe { &*(data.as_ref() as *const [u8] as *const RawDocumentRef) } - } - - /// Creates a new RawDocument with an owned copy of the BSON bytes. - /// - /// ``` - /// # use bson::raw::{Doc, RawError}; - /// use bson::raw::RawDocument; - /// - /// let data = b"\x05\0\0\0\0"; - /// let doc_ref = RawDocumentRef::new(data)?; - /// let doc: RawDocument = doc_ref.to_raw_document(); - /// # Ok::<(), RawError>(()) - pub fn to_raw_document(&self) -> RawDocument { - RawDocument { - data: self.data.to_owned().into_boxed_slice(), - } - } - - /// Gets a reference to the value corresponding to the given key by iterating until the key is - /// found. - /// - /// ``` - /// # use bson::raw::{RawDocument, elem::Element, RawError}; - /// # - /// use bson::{doc, oid::ObjectId}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "_id": ObjectId::new(), - /// "f64": 2.5, - /// }); - /// - /// let element = doc.get("f64")?.expect("finding key f64"); - /// assert_eq!(element.as_f64(), Ok(2.5)); - /// assert!(doc.get("unknown")?.is_none()); - /// # Ok::<(), RawError>(()) - /// ``` - pub fn get<'a>(&'a self, key: &str) -> RawResult>> { - for result in self.into_iter() { - let (k, v) = result?; - if key == k { - return Ok(Some(v)); - } - } - Ok(None) - } - - fn get_with<'a, T>( - &'a self, - key: &str, - f: impl FnOnce(elem::RawBson<'a>) -> RawResult, - ) -> RawResult> { - self.get(key)?.map(f).transpose() - } - - /// Gets a reference to the BSON double value corresponding to a given key or returns an error - /// if the key corresponds to a value which isn't a double. - /// - /// ``` - /// # use bson::raw::{RawDocument, elem::Element, RawError}; - /// use bson::doc; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "bool": true, - /// "f64": 2.5, - /// }); - /// - /// assert_eq!(doc.get_f64("f64"), Ok(Some(2.5))); - /// assert_eq!(doc.get_f64("bool"), Err(RawError::UnexpectedType)); - /// assert_eq!(doc.get_f64("unknown"), Ok(None)); - /// # Ok::<(), RawError>(()) - /// ``` - pub fn get_f64(&self, key: &str) -> RawResult> { - self.get_with(key, elem::RawBson::as_f64) - } - - /// Gets a reference to the string value corresponding to a given key or returns an error if the - /// key corresponds to a value which isn't a string. - /// - /// ``` - /// # use bson::raw::{RawDocument, elem::Element, RawError}; - /// use bson::doc; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "string": "hello", - /// "bool": true, - /// }); - /// - /// assert_eq!(doc.get_str("string"), Ok(Some("hello"))); - /// assert_eq!(doc.get_str("bool"), Err(RawError::UnexpectedType)); - /// assert_eq!(doc.get_str("unknown"), Ok(None)); - /// # Ok::<(), RawError>(()) - /// ``` - pub fn get_str<'a>(&'a self, key: &str) -> RawResult> { - self.get_with(key, elem::RawBson::as_str) - } - - /// Gets a reference to the document value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't a document. - /// - /// ``` - /// # use bson::raw::{RawDocument, elem::Element, RawError}; - /// use bson::doc; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "doc": { "key": "value"}, - /// "bool": true, - /// }); - /// - /// assert_eq!(doc.get_document("doc")?.expect("finding key doc").get_str("key"), Ok(Some("value"))); - /// assert_eq!(doc.get_document("bool").unwrap_err(), RawError::UnexpectedType); - /// assert!(doc.get_document("unknown")?.is_none()); - /// # Ok::<(), RawError>(()) - /// ``` - pub fn get_document<'a>(&'a self, key: &str) -> RawResult> { - self.get_with(key, elem::RawBson::as_document) - } - - /// Gets a reference to the array value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't an array. - /// - /// ``` - /// # use bson::raw::{RawDocument, elem::Element, RawError}; - /// use bson::doc; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "array": [true, 3], - /// "bool": true, - /// }); - /// - /// let mut arr_iter = docbuf.get_array("array")?.expect("finding key array").into_iter(); - /// let _: bool = arriter.next().unwrap()?.as_bool()?; - /// let _: i32 = arriter.next().unwrap()?.as_i32()?; - /// - /// assert!(arr_iter.next().is_none()); - /// assert!(doc.get_array("bool").is_err()); - /// assert!(doc.get_array("unknown")?.is_none()); - /// # Ok::<(), RawError>(()) - /// ``` - pub fn get_array<'a>(&'a self, key: &str) -> RawResult> { - self.get_with(key, elem::RawBson::as_array) - } - - /// Gets a reference to the BSON binary value corresponding to a given key or returns an error - /// if the key corresponds to a value which isn't a binary value. - /// - /// ``` - /// # use bson::raw::{RawDocument, elem, RawError}; - /// - /// use bson::{ - /// spec::BinarySubtype - /// doc, Binary, - /// }; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1, 2, 3] }, - /// "bool": true, - /// }); - /// - /// assert_eq!(doc.get_binary("binary")?.map(elem::RawBsonBinary::as_bytes), Some(&[1, 2, 3][..])); - /// assert_eq!(doc.get_binary("bool").unwrap_err(), RawError::UnexpectedType); - /// assert!(doc.get_binary("unknown")?.is_none()); - /// # Ok::<(), RawError>(()) - /// ``` - pub fn get_binary<'a>(&'a self, key: &str) -> RawResult>> { - self.get_with(key, elem::RawBson::as_binary) - } - - /// Gets a reference to the ObjectId value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't an ObjectId. - /// - /// ``` - /// # use bson::raw::{RawDocument, RawError}; - /// use bson::{doc, oid::ObjectId}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "_id": ObjectId::new(), - /// "bool": true, - /// }); - /// - /// let oid = doc.get_object_id("_id")?.unwrap(); - /// assert_eq!(doc.get_object_id("bool").unwrap_err(), RawError::UnexpectedType); - /// assert!(doc.get_object_id("unknown")?.is_none()); - /// # Ok::<(), RawError>(()) - /// ``` - pub fn get_object_id(&self, key: &str) -> RawResult> { - self.get_with(key, elem::RawBson::as_object_id) - } - - /// Gets a reference to the boolean value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't a boolean. - /// - /// ``` - /// # use bson::raw::{RawDocument, RawError}; - /// use bson::{doc, oid::ObjectId}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "_id": ObjectId::new(), - /// "bool": true, - /// }); - /// - /// assert!(doc.get_bool("bool")?.unwrap()); - /// assert_eq!(doc.get_bool("_id").unwrap_err(), RawError::UnexpectedType); - /// assert!(doc.get_object_id("unknown")?.is_none()); - /// # Ok::<(), RawError>(()) - /// ``` - pub fn get_bool(&self, key: &str) -> RawResult> { - self.get_with(key, elem::RawBson::as_bool) - } - - /// Gets a reference to the BSON DateTime value corresponding to a given key or returns an error - /// if the key corresponds to a value which isn't a DateTime. - /// - /// ``` - /// # use bson::raw::{RawDocument, RawError}; - /// use bson::doc; - /// use chrono::{Utc, Datelike, TimeZone}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "created_at": Utc.ymd(2020, 3, 15).and_hms(17, 0, 0), - /// "bool": true, - /// }); - /// assert_eq!(doc.get_datetime("created_at")?.unwrap().year(), 2020); - /// assert_eq!(doc.get_datetime("bool").unwrap_err(), RawError::UnexpectedType); - /// assert!(doc.get_datetime("unknown")?.is_none()); - /// # Ok::<(), RawError>(()) - /// ``` - pub fn get_datetime(&self, key: &str) -> RawResult>> { - self.get_with(key, elem::RawBson::as_datetime) - } - /// Gets a reference to the BSON regex value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't a regex. - /// - /// ``` - /// # use bson::raw::{RawDocument, RawError, elem}; - /// use bson::{doc, Regex}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "regex": Regex { - /// pattern: r"end\s*$".into(), - /// options: "i".into(), - /// }, - /// "bool": true, - /// }); - /// - /// assert_eq!(doc.get_regex("regex")?.unwrap().pattern(), r"end\s*$"); - /// assert_eq!(doc.get_regex("regex")?.unwrap().options(), "i"); - /// assert_eq!(doc.get_regex("bool").unwrap_err(), RawError::UnexpectedType); - /// assert!(doc.get_regex("unknown")?.is_none()); - /// # Ok::<(), RawError>(()) - /// ``` - pub fn get_regex<'a>(&'a self, key: &str) -> RawResult>> { - self.get_with(key, elem::RawBson::as_regex) - } - - /// Gets a reference to the BSON timestamp value corresponding to a given key or returns an - /// error if the key corresponds to a value which isn't a timestamp. - /// - /// ``` - /// # use bson::raw::{RawDocument, elem, RawError}; - /// use bson::{doc, Timestamp}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "bool": true, - /// "ts": Timestamp { time: 649876543, increment: 9 }, - /// }); - /// - /// let timestamp = doc.get_timestamp("ts")?.unwrap(); - /// - /// assert_eq!(timestamp.time(), 649876543); - /// assert_eq!(timestamp.increment(), 9); - /// assert_eq!(doc.get_timestamp("bool"), Err(RawError::UnexpectedType)); - /// assert_eq!(doc.get_timestamp("unknown"), Ok(None)); - /// # Ok::<(), RawError>(()) - /// ``` - pub fn get_timestamp<'a>(&'a self, key: &str) -> RawResult>> { - self.get_with(key, elem::RawBson::as_timestamp) - } - - /// Gets a reference to the BSON int32 value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't a 32-bit integer. - /// - /// ``` - /// # use bson::raw::{RawDocument, RawError}; - /// use bson::doc; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "bool": true, - /// "i32": 1_000_000, - /// }); - /// - /// assert_eq!(doc.get_i32("i32"), Ok(Some(1_000_000))); - /// assert_eq!(doc.get_i32("bool"), Err(RawError::UnexpectedType)); - /// assert_eq!(doc.get_i32("unknown"), Ok(None)); - /// # Ok::<(), RawError>(()) - /// ``` - pub fn get_i32(&self, key: &str) -> RawResult> { - self.get_with(key, elem::RawBson::as_i32) - } - - /// Gets a reference to the BSON int64 value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't a 64-bit integer. - /// - /// ``` - /// # use bson::raw::{RawDocument, elem::Element, RawError}; - /// use bson::doc; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "bool": true, - /// "i64": 9223372036854775807_i64, - /// }); - /// - /// assert_eq!(doc.get_i64("i64"), Ok(Some(9223372036854775807))); - /// assert_eq!(doc.get_i64("bool"), Err(RawError::UnexpectedType)); - /// assert_eq!(doc.get_i64("unknown"), Ok(None)); - /// # Ok::<(), RawError>(()) - /// ``` - pub fn get_i64(&self, key: &str) -> RawResult> { - self.get_with(key, elem::RawBson::as_i64) - } - - /// Return a reference to the contained data as a `&[u8]` - /// - /// ``` - /// # use bson::raw::RawDocument; - /// use bson::doc; - /// let docbuf = RawDocument::from_document(&doc!{}); - /// assert_eq!(docbuf.as_bytes(), b"\x05\x00\x00\x00\x00"); - /// ``` - pub fn as_bytes(&self) -> &[u8] { - &self.data - } -} - -impl AsRef for RawDocumentRef { - fn as_ref(&self) -> &RawDocumentRef { - self - } -} - -impl Deref for RawDocument { - type Target = RawDocumentRef; - - fn deref(&self) -> &Self::Target { - RawDocumentRef::new_unchecked(&self.data) - } -} - -impl TryFrom<&RawDocumentRef> for crate::Document { - type Error = RawError; - - fn try_from(rawdoc: &RawDocumentRef) -> RawResult { - rawdoc - .into_iter() - .map(|res| res.and_then(|(k, v)| Ok((k.to_owned(), v.try_into()?)))) - .collect() - } -} - -impl<'a> IntoIterator for &'a RawDocumentRef { - type IntoIter = RawDocumentIter<'a>; - type Item = RawResult<(&'a str, RawBson<'a>)>; - - fn into_iter(self) -> RawDocumentIter<'a> { - RawDocumentIter { - doc: self, - offset: 4, - } - } -} - -pub struct RawDocumentIter<'a> { - doc: &'a RawDocumentRef, - offset: usize, -} - -impl<'a> Iterator for RawDocumentIter<'a> { - type Item = RawResult<(&'a str, elem::RawBson<'a>)>; - - fn next(&mut self) -> Option)>> { - if self.offset == self.doc.data.len() - 1 { - if self.doc.data[self.offset] == 0 { - // end of document marker - return None; - } else { - return Some(Err(RawError::MalformedValue { - message: "document not null terminated".into(), - })); - } - } - - let key = match read_nullterminated(&self.doc.data[self.offset + 1..]) { - Ok(key) => key, - Err(err) => return Some(Err(err)), - }; - - let valueoffset = self.offset + 1 + key.len() + 1; // type specifier + key + \0 - - let element_type = match ElementType::from(self.doc.data[self.offset]) { - Some(et) => et, - None => { - return Some(Err(RawError::MalformedValue { - message: format!("invalid tag: {}", self.doc.data[self.offset]), - })) - } - }; - - let element_size = match element_type { - ElementType::Double => 8, - ElementType::String => { - let size = - 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; - - if self.doc.data[valueoffset + size - 1] != 0 { - return Some(Err(RawError::MalformedValue { - message: "string not null terminated".into(), - })); - } - - size - } - ElementType::EmbeddedDocument => { - let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; - - if self.doc.data[valueoffset + size - 1] != 0 { - return Some(Err(RawError::MalformedValue { - message: "document not null terminated".into(), - })); - } - - size - } - ElementType::Array => { - let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; - - if self.doc.data[valueoffset + size - 1] != 0 { - return Some(Err(RawError::MalformedValue { - message: "array not null terminated".into(), - })); - } - - size - } - ElementType::Binary => { - 5 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize - } - ElementType::Undefined => 0, - ElementType::ObjectId => 12, - ElementType::Boolean => 1, - ElementType::DateTime => 8, - ElementType::Null => 0, - ElementType::RegularExpression => { - let regex = match read_nullterminated(&self.doc.data[valueoffset..]) { - Ok(regex) => regex, - Err(err) => return Some(Err(err)), - }; - - let options = - match read_nullterminated(&self.doc.data[valueoffset + regex.len() + 1..]) { - Ok(options) => options, - Err(err) => return Some(Err(err)), - }; - - regex.len() + options.len() + 2 - } - ElementType::DbPointer => { - let string_size = - 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; - - let id_size = 12; - - if self.doc.data[valueoffset + string_size - 1] != 0 { - return Some(Err(RawError::MalformedValue { - message: "DBPointer string not null-terminated".into(), - })); - } - - string_size + id_size - } - ElementType::JavaScriptCode => { - let size = - 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; - - if self.doc.data[valueoffset + size - 1] != 0 { - return Some(Err(RawError::MalformedValue { - message: "javascript code not null-terminated".into(), - })); - } - - size - } - ElementType::Symbol => { - 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize - } - ElementType::JavaScriptCodeWithScope => { - let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; - - if self.doc.data[valueoffset + size - 1] != 0 { - return Some(Err(RawError::MalformedValue { - message: "javascript with scope not null-terminated".into(), - })); - } - - size - } - ElementType::Int32 => 4, - ElementType::Timestamp => 8, - ElementType::Int64 => 8, - ElementType::Decimal128 => 16, - ElementType::MaxKey => 0, - ElementType::MinKey => 0, - }; - - let nextoffset = valueoffset + element_size; - self.offset = nextoffset; - - Some(Ok(( - key, - elem::RawBson::new(element_type, &self.doc.data[valueoffset..nextoffset]), - ))) - } -} - -/// A BSON array referencing raw bytes stored elsewhere. -pub struct RawArray { - doc: RawDocumentRef, -} - -impl RawArray { - fn new(data: &[u8]) -> RawResult<&RawArray> { - Ok(RawArray::from_doc(RawDocumentRef::new(data)?)) - } - - fn from_doc(doc: &RawDocumentRef) -> &RawArray { - // SAFETY: - // - // Dereferencing a raw pointer requires unsafe due to the potential that the pointer is - // null, dangling, or misaligned. We know the pointer is not null or dangling due to the - // fact that it's created by a safe reference. Converting &RawDocumentRef to *const - // RawDocumentRef will be properly aligned due to them being references to the same type, - // and converting *const RawDocumentRef to *const RawArray is aligned due to the fact that - // the only field in a RawArray is a RawDocumentRef, meaning the structs are represented - // identically at the byte level. - unsafe { &*(doc as *const RawDocumentRef as *const RawArray) } - } - - /// Gets a reference to the value at the given index. - pub fn get(&self, index: usize) -> RawResult>> { - self.into_iter().nth(index).transpose() - } - - fn get_with<'a, T>( - &'a self, - index: usize, - f: impl FnOnce(elem::RawBson<'a>) -> RawResult, - ) -> RawResult> { - self.get(index)?.map(f).transpose() - } - - /// Gets the BSON double at the given index or returns an error if the value at that index isn't - /// a double. - pub fn get_f64(&self, index: usize) -> RawResult> { - self.get_with(index, elem::RawBson::as_f64) - } - - /// Gets a reference to the string at the given index or returns an error if the - /// value at that index isn't a string. - pub fn get_str(&self, index: usize) -> RawResult> { - self.get_with(index, elem::RawBson::as_str) - } - - /// Gets a reference to the document at the given index or returns an error if the - /// value at that index isn't a document. - pub fn get_document(&self, index: usize) -> RawResult> { - self.get_with(index, elem::RawBson::as_document) - } - - /// Gets a reference to the array at the given index or returns an error if the - /// value at that index isn't a array. - pub fn get_array(&self, index: usize) -> RawResult> { - self.get_with(index, elem::RawBson::as_array) - } - - /// Gets a reference to the BSON binary value at the given index or returns an error if the - /// value at that index isn't a binary. - pub fn get_binary(&self, index: usize) -> RawResult>> { - self.get_with(index, elem::RawBson::as_binary) - } - - /// Gets the ObjectId at the given index or returns an error if the value at that index isn't an - /// ObjectId. - pub fn get_object_id(&self, index: usize) -> RawResult> { - self.get_with(index, elem::RawBson::as_object_id) - } - - /// Gets the boolean at the given index or returns an error if the value at that index isn't a - /// boolean. - pub fn get_bool(&self, index: usize) -> RawResult> { - self.get_with(index, elem::RawBson::as_bool) - } - - /// Gets the DateTime at the given index or returns an error if the value at that index isn't a - /// DateTime. - pub fn get_datetime(&self, index: usize) -> RawResult>> { - self.get_with(index, elem::RawBson::as_datetime) - } - - /// Gets a reference to the BSON regex at the given index or returns an error if the - /// value at that index isn't a regex. - pub fn get_regex(&self, index: usize) -> RawResult>> { - self.get_with(index, elem::RawBson::as_regex) - } - - /// Gets a reference to the BSON timestamp at the given index or returns an error if the - /// value at that index isn't a timestamp. - pub fn get_timestamp(&self, index: usize) -> RawResult>> { - self.get_with(index, elem::RawBson::as_timestamp) - } - - /// Gets the BSON int32 at the given index or returns an error if the value at that index isn't - /// a 32-bit integer. - pub fn get_i32(&self, index: usize) -> RawResult> { - self.get_with(index, elem::RawBson::as_i32) - } - - /// Gets BSON int64 at the given index or returns an error if the value at that index isn't a - /// 64-bit integer. - pub fn get_i64(&self, index: usize) -> RawResult> { - self.get_with(index, elem::RawBson::as_i64) - } - - /// Gets a reference to the raw bytes of the RawArray. - pub fn as_bytes(&self) -> &[u8] { - self.doc.as_bytes() - } -} - -impl TryFrom<&RawArray> for Vec { - type Error = RawError; - - fn try_from(arr: &RawArray) -> RawResult> { - arr.into_iter() - .map(|result| { - let rawbson = result?; - Bson::try_from(rawbson) - }) - .collect() - } -} - -impl<'a> IntoIterator for &'a RawArray { - type IntoIter = RawArrayIter<'a>; - type Item = RawResult>; - - fn into_iter(self) -> RawArrayIter<'a> { - RawArrayIter { - inner: self.doc.into_iter(), - } - } -} - -/// An iterator over borrwed raw BSON array values. -pub struct RawArrayIter<'a> { - inner: RawDocumentIter<'a>, -} - -impl<'a> Iterator for RawArrayIter<'a> { - type Item = RawResult>; - - fn next(&mut self) -> Option>> { - match self.inner.next() { - Some(Ok((_, v))) => Some(Ok(v)), - Some(Err(e)) => Some(Err(e)), - None => None, - } - } -} /// Given a 4 byte u8 slice, return an i32 calculated from the bytes in /// little endian order /// @@ -1141,504 +155,29 @@ fn d128_from_slice(val: &[u8]) -> Decimal128 { Decimal128::from(d) } -fn read_nullterminated(buf: &[u8]) -> RawResult<&str> { +fn read_nullterminated(buf: &[u8]) -> Result<&str> { let mut splits = buf.splitn(2, |x| *x == 0); - let value = splits.next().ok_or_else(|| RawError::MalformedValue { + let value = splits.next().ok_or_else(|| Error::MalformedValue { message: "no value".into(), })?; if splits.next().is_some() { Ok(try_to_str(value)?) } else { - Err(RawError::MalformedValue { + Err(Error::MalformedValue { message: "expected null terminator".into(), }) } } -fn read_lenencoded(buf: &[u8]) -> RawResult<&str> { +fn read_lenencoded(buf: &[u8]) -> Result<&str> { let length = i32_from_slice(&buf[..4]); assert!(buf.len() as i32 >= length + 4); try_to_str(&buf[4..4 + length as usize - 1]) } -fn try_to_str(data: &[u8]) -> RawResult<&str> { +fn try_to_str(data: &[u8]) -> Result<&str> { match std::str::from_utf8(data) { Ok(s) => Ok(s), - Err(_) => Err(RawError::Utf8EncodingError(data.into())), - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{ - doc, - spec::BinarySubtype, - Binary, - Bson, - JavaScriptCodeWithScope, - Regex, - Timestamp, - }; - use chrono::TimeZone; - - fn to_bytes(doc: &crate::Document) -> Vec { - let mut docbytes = Vec::new(); - doc.to_writer(&mut docbytes).unwrap(); - docbytes - } - - #[test] - fn string_from_document() { - let docbytes = to_bytes(&doc! { - "this": "first", - "that": "second", - "something": "else", - }); - let rawdoc = RawDocumentRef::new(&docbytes).unwrap(); - assert_eq!( - rawdoc.get("that").unwrap().unwrap().as_str().unwrap(), - "second", - ); - } - - #[test] - fn nested_document() { - let docbytes = to_bytes(&doc! { - "outer": { - "inner": "surprise", - }, - }); - let rawdoc = RawDocumentRef::new(&docbytes).unwrap(); - assert_eq!( - rawdoc - .get("outer") - .expect("get doc result") - .expect("get doc option") - .as_document() - .expect("as doc") - .get("inner") - .expect("get str result") - .expect("get str option") - .as_str() - .expect("as str"), - "surprise", - ); - } - - #[test] - fn iterate() { - let docbytes = to_bytes(&doc! { - "apples": "oranges", - "peanut butter": "chocolate", - "easy as": {"do": 1, "re": 2, "mi": 3}, - }); - let rawdoc = RawDocumentRef::new(&docbytes).expect("malformed bson document"); - let mut dociter = rawdoc.into_iter(); - let next = dociter.next().expect("no result").expect("invalid bson"); - assert_eq!(next.0, "apples"); - assert_eq!(next.1.as_str().expect("result was not a str"), "oranges"); - let next = dociter.next().expect("no result").expect("invalid bson"); - assert_eq!(next.0, "peanut butter"); - assert_eq!(next.1.as_str().expect("result was not a str"), "chocolate"); - let next = dociter.next().expect("no result").expect("invalid bson"); - assert_eq!(next.0, "easy as"); - let _doc = next.1.as_document().expect("result was a not a document"); - let next = dociter.next(); - assert!(next.is_none()); - } - - #[test] - fn rawdoc_to_doc() { - let docbytes = to_bytes(&doc! { - "f64": 2.5, - "string": "hello", - "document": {}, - "array": ["binary", "serialized", "object", "notation"], - "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1, 2, 3] }, - "object_id": ObjectId::with_bytes([1, 2, 3, 4, 5,6,7,8,9,10, 11,12]), - "boolean": true, - "datetime": Utc::now(), - "null": Bson::Null, - "regex": Bson::RegularExpression(Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}), - "javascript": Bson::JavaScriptCode(String::from("console.log(console);")), - "symbol": Bson::Symbol(String::from("artist-formerly-known-as")), - "javascript_with_scope": Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope{ code: String::from("console.log(msg);"), scope: doc!{"ok": true}}), - "int32": 23i32, - "timestamp": Bson::Timestamp(Timestamp { time: 3542578, increment: 0 }), - "int64": 46i64, - "end": "END", - }); - - let rawdoc = RawDocumentRef::new(&docbytes).expect("invalid document"); - let _doc: crate::Document = rawdoc.try_into().expect("invalid bson"); - } - - #[test] - fn f64() { - #![allow(clippy::float_cmp)] - - let rawdoc = RawDocument::from_document(&doc! {"f64": 2.5}); - assert_eq!( - rawdoc - .get("f64") - .expect("error finding key f64") - .expect("no key f64") - .as_f64() - .expect("result was not a f64"), - 2.5, - ); - } - - #[test] - fn string() { - let rawdoc = RawDocument::from_document(&doc! {"string": "hello"}); - - assert_eq!( - rawdoc - .get("string") - .expect("error finding key string") - .expect("no key string") - .as_str() - .expect("result was not a string"), - "hello", - ); - } - #[test] - fn document() { - let rawdoc = RawDocument::from_document(&doc! {"document": {}}); - - let doc = rawdoc - .get("document") - .expect("error finding key document") - .expect("no key document") - .as_document() - .expect("result was not a document"); - assert_eq!(&doc.data, [5, 0, 0, 0, 0].as_ref()); // Empty document - } - - #[test] - fn array() { - let rawdoc = RawDocument::from_document( - &doc! { "array": ["binary", "serialized", "object", "notation"]}, - ); - - let array = rawdoc - .get("array") - .expect("error finding key array") - .expect("no key array") - .as_array() - .expect("result was not an array"); - assert_eq!(array.get_str(0), Ok(Some("binary"))); - assert_eq!(array.get_str(3), Ok(Some("notation"))); - assert_eq!(array.get_str(4), Ok(None)); - } - - #[test] - fn binary() { - let rawdoc = RawDocument::from_document(&doc! { - "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] } - }); - let binary: elem::RawBinary<'_> = rawdoc - .get("binary") - .expect("error finding key binary") - .expect("no key binary") - .as_binary() - .expect("result was not a binary object"); - assert_eq!(binary.subtype, BinarySubtype::Generic); - assert_eq!(binary.data, &[1, 2, 3]); - } - - #[test] - fn object_id() { - let rawdoc = RawDocument::from_document(&doc! { - "object_id": ObjectId::with_bytes([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), - }); - let oid = rawdoc - .get("object_id") - .expect("error finding key object_id") - .expect("no key object_id") - .as_object_id() - .expect("result was not an object id"); - assert_eq!(oid.to_hex(), "0102030405060708090a0b0c"); - } - - #[test] - fn boolean() { - let rawdoc = RawDocument::from_document(&doc! { - "boolean": true, - }); - - let boolean = rawdoc - .get("boolean") - .expect("error finding key boolean") - .expect("no key boolean") - .as_bool() - .expect("result was not boolean"); - - assert_eq!(boolean, true); - } - - #[test] - fn datetime() { - let rawdoc = RawDocument::from_document(&doc! { - "boolean": true, - "datetime": Utc.ymd(2000,10,31).and_hms(12, 30, 45), - }); - let datetime = rawdoc - .get("datetime") - .expect("error finding key datetime") - .expect("no key datetime") - .as_datetime() - .expect("result was not datetime"); - assert_eq!(datetime.to_rfc3339(), "2000-10-31T12:30:45+00:00"); - } - - #[test] - fn null() { - let rawdoc = RawDocument::from_document(&doc! { - "null": null, - }); - let () = rawdoc - .get("null") - .expect("error finding key null") - .expect("no key null") - .as_null() - .expect("was not null"); - } - - #[test] - fn regex() { - let rawdoc = RawDocument::from_document(&doc! { - "regex": Bson::RegularExpression(Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}), - }); - let regex = rawdoc - .get("regex") - .expect("error finding key regex") - .expect("no key regex") - .as_regex() - .expect("was not regex"); - assert_eq!(regex.pattern, r"end\s*$"); - assert_eq!(regex.options, "i"); - } - #[test] - fn javascript() { - let rawdoc = RawDocument::from_document(&doc! { - "javascript": Bson::JavaScriptCode(String::from("console.log(console);")), - }); - let js = rawdoc - .get("javascript") - .expect("error finding key javascript") - .expect("no key javascript") - .as_javascript() - .expect("was not javascript"); - assert_eq!(js, "console.log(console);"); - } - - #[test] - fn symbol() { - let rawdoc = RawDocument::from_document(&doc! { - "symbol": Bson::Symbol(String::from("artist-formerly-known-as")), - }); - - let symbol = rawdoc - .get("symbol") - .expect("error finding key symbol") - .expect("no key symbol") - .as_symbol() - .expect("was not symbol"); - assert_eq!(symbol, "artist-formerly-known-as"); - } - - #[test] - fn javascript_with_scope() { - let rawdoc = RawDocument::from_document(&doc! { - "javascript_with_scope": Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope{ code: String::from("console.log(msg);"), scope: doc!{"ok": true}}), - }); - let (js, scopedoc) = rawdoc - .get("javascript_with_scope") - .expect("error finding key javascript_with_scope") - .expect("no key javascript_with_scope") - .as_javascript_with_scope() - .expect("was not javascript with scope"); - assert_eq!(js, "console.log(msg);"); - let (scope_key, scope_value_bson) = scopedoc - .into_iter() - .next() - .expect("no next value in scope") - .expect("invalid element"); - assert_eq!(scope_key, "ok"); - let scope_value = scope_value_bson.as_bool().expect("not a boolean"); - assert_eq!(scope_value, true); - } - - #[test] - fn int32() { - let rawdoc = RawDocument::from_document(&doc! { - "int32": 23i32, - }); - let int32 = rawdoc - .get("int32") - .expect("error finding key int32") - .expect("no key int32") - .as_i32() - .expect("was not int32"); - assert_eq!(int32, 23i32); - } - - #[test] - fn timestamp() { - let rawdoc = RawDocument::from_document(&doc! { - "timestamp": Bson::Timestamp(Timestamp { time: 3542578, increment: 7 }), - }); - let ts = rawdoc - .get("timestamp") - .expect("error finding key timestamp") - .expect("no key timestamp") - .as_timestamp() - .expect("was not a timestamp"); - - assert_eq!(ts.increment(), 7); - assert_eq!(ts.time(), 3542578); - } - - #[test] - fn int64() { - let rawdoc = RawDocument::from_document(&doc! { - "int64": 46i64, - }); - let int64 = rawdoc - .get("int64") - .expect("error finding key int64") - .expect("no key int64") - .as_i64() - .expect("was not int64"); - assert_eq!(int64, 46i64); - } - #[test] - fn document_iteration() { - let docbytes = to_bytes(&doc! { - "f64": 2.5, - "string": "hello", - "document": {}, - "array": ["binary", "serialized", "object", "notation"], - "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] }, - "object_id": ObjectId::with_bytes([1, 2, 3, 4, 5,6,7,8,9,10, 11,12]), - "boolean": true, - "datetime": Utc::now(), - "null": Bson::Null, - "regex": Bson::RegularExpression(Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}), - "javascript": Bson::JavaScriptCode(String::from("console.log(console);")), - "symbol": Bson::Symbol(String::from("artist-formerly-known-as")), - "javascript_with_scope": Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope{ code: String::from("console.log(msg);"), scope: doc!{"ok": true}}), - "int32": 23i32, - "timestamp": Bson::Timestamp(Timestamp { time: 3542578, increment: 0 }), - "int64": 46i64, - "end": "END", - }); - let rawdoc = unsafe { RawDocumentRef::new_unchecked(&docbytes) }; - - assert_eq!( - rawdoc - .into_iter() - .collect::, RawError>>() - .expect("collecting iterated doc") - .len(), - 17 - ); - let end = rawdoc - .get("end") - .expect("error finding key end") - .expect("no key end") - .as_str() - .expect("was not str"); - assert_eq!(end, "END"); - } - - #[test] - fn into_bson_conversion() { - let docbytes = to_bytes(&doc! { - "f64": 2.5, - "string": "hello", - "document": {}, - "array": ["binary", "serialized", "object", "notation"], - "object_id": ObjectId::with_bytes([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), - "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] }, - "boolean": false, - }); - let rawbson = elem::RawBson::new(ElementType::EmbeddedDocument, &docbytes); - let b: Bson = rawbson.try_into().expect("invalid bson"); - let doc = b.as_document().expect("not a document"); - assert_eq!(*doc.get("f64").expect("f64 not found"), Bson::Double(2.5)); - assert_eq!( - *doc.get("string").expect("string not found"), - Bson::String(String::from("hello")) - ); - assert_eq!( - *doc.get("document").expect("document not found"), - Bson::Document(doc! {}) - ); - assert_eq!( - *doc.get("array").expect("array not found"), - Bson::Array( - vec!["binary", "serialized", "object", "notation"] - .into_iter() - .map(|s| Bson::String(String::from(s))) - .collect() - ) - ); - assert_eq!( - *doc.get("object_id").expect("object_id not found"), - Bson::ObjectId(ObjectId::with_bytes([ - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 - ])) - ); - assert_eq!( - *doc.get("binary").expect("binary not found"), - Bson::Binary(Binary { - subtype: BinarySubtype::Generic, - bytes: vec![1, 2, 3] - }) - ); - assert_eq!( - *doc.get("boolean").expect("boolean not found"), - Bson::Boolean(false) - ); - } -} - -#[cfg(test)] -mod proptests { - use proptest::prelude::*; - use std::convert::TryInto; - - use super::{props::arbitrary_bson, RawDocument}; - use crate::doc; - - fn to_bytes(doc: &crate::Document) -> Vec { - let mut docbytes = Vec::new(); - doc.to_writer(&mut docbytes).unwrap(); - docbytes - } - - proptest! { - #[test] - fn no_crashes(s: Vec) { - let _ = RawDocument::new(s); - } - - #[test] - fn roundtrip_bson(bson in arbitrary_bson()) { - println!("{:?}", bson); - let doc = doc!{"bson": bson}; - let raw = to_bytes(&doc); - let raw = RawDocument::new(raw); - prop_assert!(raw.is_ok()); - let raw = raw.unwrap(); - let roundtrip: Result = raw.try_into(); - prop_assert!(roundtrip.is_ok()); - let roundtrip = roundtrip.unwrap(); - prop_assert_eq!(doc, roundtrip); - } + Err(_) => Err(Error::Utf8EncodingError(data.into())), } } diff --git a/src/raw/test.rs b/src/raw/test.rs new file mode 100644 index 00000000..9a801198 --- /dev/null +++ b/src/raw/test.rs @@ -0,0 +1,460 @@ +use super::*; +use crate::{doc, spec::BinarySubtype, Binary, Bson, JavaScriptCodeWithScope, Regex, Timestamp}; +use chrono::TimeZone; + +fn to_bytes(doc: &crate::Document) -> Vec { + let mut docbytes = Vec::new(); + doc.to_writer(&mut docbytes).unwrap(); + docbytes +} + +#[test] +fn string_from_document() { + let docbytes = to_bytes(&doc! { + "this": "first", + "that": "second", + "something": "else", + }); + let rawdoc = RawDocumentRef::new(&docbytes).unwrap(); + assert_eq!( + rawdoc.get("that").unwrap().unwrap().as_str().unwrap(), + "second", + ); +} + +#[test] +fn nested_document() { + let docbytes = to_bytes(&doc! { + "outer": { + "inner": "surprise", + }, + }); + let rawdoc = RawDocumentRef::new(&docbytes).unwrap(); + assert_eq!( + rawdoc + .get("outer") + .expect("get doc result") + .expect("get doc option") + .as_document() + .expect("as doc") + .get("inner") + .expect("get str result") + .expect("get str option") + .as_str() + .expect("as str"), + "surprise", + ); +} + +#[test] +fn iterate() { + let docbytes = to_bytes(&doc! { + "apples": "oranges", + "peanut butter": "chocolate", + "easy as": {"do": 1, "re": 2, "mi": 3}, + }); + let rawdoc = RawDocumentRef::new(&docbytes).expect("malformed bson document"); + let mut dociter = rawdoc.into_iter(); + let next = dociter.next().expect("no result").expect("invalid bson"); + assert_eq!(next.0, "apples"); + assert_eq!(next.1.as_str().expect("result was not a str"), "oranges"); + let next = dociter.next().expect("no result").expect("invalid bson"); + assert_eq!(next.0, "peanut butter"); + assert_eq!(next.1.as_str().expect("result was not a str"), "chocolate"); + let next = dociter.next().expect("no result").expect("invalid bson"); + assert_eq!(next.0, "easy as"); + let _doc = next.1.as_document().expect("result was a not a document"); + let next = dociter.next(); + assert!(next.is_none()); +} + +#[test] +fn rawdoc_to_doc() { + let docbytes = to_bytes(&doc! { + "f64": 2.5, + "string": "hello", + "document": {}, + "array": ["binary", "serialized", "object", "notation"], + "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1, 2, 3] }, + "object_id": ObjectId::with_bytes([1, 2, 3, 4, 5,6,7,8,9,10, 11,12]), + "boolean": true, + "datetime": Utc::now(), + "null": Bson::Null, + "regex": Bson::RegularExpression(Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}), + "javascript": Bson::JavaScriptCode(String::from("console.log(console);")), + "symbol": Bson::Symbol(String::from("artist-formerly-known-as")), + "javascript_with_scope": Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope{ code: String::from("console.log(msg);"), scope: doc!{"ok": true}}), + "int32": 23i32, + "timestamp": Bson::Timestamp(Timestamp { time: 3542578, increment: 0 }), + "int64": 46i64, + "end": "END", + }); + + let rawdoc = RawDocumentRef::new(&docbytes).expect("invalid document"); + let _doc: crate::Document = rawdoc.try_into().expect("invalid bson"); +} + +#[test] +fn f64() { + #![allow(clippy::float_cmp)] + + let rawdoc = RawDocument::from_document(&doc! {"f64": 2.5}); + assert_eq!( + rawdoc + .get("f64") + .expect("error finding key f64") + .expect("no key f64") + .as_f64() + .expect("result was not a f64"), + 2.5, + ); +} + +#[test] +fn string() { + let rawdoc = RawDocument::from_document(&doc! {"string": "hello"}); + + assert_eq!( + rawdoc + .get("string") + .expect("error finding key string") + .expect("no key string") + .as_str() + .expect("result was not a string"), + "hello", + ); +} +#[test] +fn document() { + let rawdoc = RawDocument::from_document(&doc! {"document": {}}); + + let doc = rawdoc + .get("document") + .expect("error finding key document") + .expect("no key document") + .as_document() + .expect("result was not a document"); + assert_eq!(&doc.data, [5, 0, 0, 0, 0].as_ref()); // Empty document +} + +#[test] +fn array() { + let rawdoc = RawDocument::from_document( + &doc! { "array": ["binary", "serialized", "object", "notation"]}, + ); + + let array = rawdoc + .get("array") + .expect("error finding key array") + .expect("no key array") + .as_array() + .expect("result was not an array"); + assert_eq!(array.get_str(0), Ok(Some("binary"))); + assert_eq!(array.get_str(3), Ok(Some("notation"))); + assert_eq!(array.get_str(4), Ok(None)); +} + +#[test] +fn binary() { + let rawdoc = RawDocument::from_document(&doc! { + "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] } + }); + let binary: elem::RawBinary<'_> = rawdoc + .get("binary") + .expect("error finding key binary") + .expect("no key binary") + .as_binary() + .expect("result was not a binary object"); + assert_eq!(binary.subtype, BinarySubtype::Generic); + assert_eq!(binary.data, &[1, 2, 3]); +} + +#[test] +fn object_id() { + let rawdoc = RawDocument::from_document(&doc! { + "object_id": ObjectId::with_bytes([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), + }); + let oid = rawdoc + .get("object_id") + .expect("error finding key object_id") + .expect("no key object_id") + .as_object_id() + .expect("result was not an object id"); + assert_eq!(oid.to_hex(), "0102030405060708090a0b0c"); +} + +#[test] +fn boolean() { + let rawdoc = RawDocument::from_document(&doc! { + "boolean": true, + }); + + let boolean = rawdoc + .get("boolean") + .expect("error finding key boolean") + .expect("no key boolean") + .as_bool() + .expect("result was not boolean"); + + assert_eq!(boolean, true); +} + +#[test] +fn datetime() { + let rawdoc = RawDocument::from_document(&doc! { + "boolean": true, + "datetime": Utc.ymd(2000,10,31).and_hms(12, 30, 45), + }); + let datetime = rawdoc + .get("datetime") + .expect("error finding key datetime") + .expect("no key datetime") + .as_datetime() + .expect("result was not datetime"); + assert_eq!(datetime.to_rfc3339(), "2000-10-31T12:30:45+00:00"); +} + +#[test] +fn null() { + let rawdoc = RawDocument::from_document(&doc! { + "null": null, + }); + let () = rawdoc + .get("null") + .expect("error finding key null") + .expect("no key null") + .as_null() + .expect("was not null"); +} + +#[test] +fn regex() { + let rawdoc = RawDocument::from_document(&doc! { + "regex": Bson::RegularExpression(Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}), + }); + let regex = rawdoc + .get("regex") + .expect("error finding key regex") + .expect("no key regex") + .as_regex() + .expect("was not regex"); + assert_eq!(regex.pattern, r"end\s*$"); + assert_eq!(regex.options, "i"); +} +#[test] +fn javascript() { + let rawdoc = RawDocument::from_document(&doc! { + "javascript": Bson::JavaScriptCode(String::from("console.log(console);")), + }); + let js = rawdoc + .get("javascript") + .expect("error finding key javascript") + .expect("no key javascript") + .as_javascript() + .expect("was not javascript"); + assert_eq!(js, "console.log(console);"); +} + +#[test] +fn symbol() { + let rawdoc = RawDocument::from_document(&doc! { + "symbol": Bson::Symbol(String::from("artist-formerly-known-as")), + }); + + let symbol = rawdoc + .get("symbol") + .expect("error finding key symbol") + .expect("no key symbol") + .as_symbol() + .expect("was not symbol"); + assert_eq!(symbol, "artist-formerly-known-as"); +} + +#[test] +fn javascript_with_scope() { + let rawdoc = RawDocument::from_document(&doc! { + "javascript_with_scope": Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope{ code: String::from("console.log(msg);"), scope: doc!{"ok": true}}), + }); + let (js, scopedoc) = rawdoc + .get("javascript_with_scope") + .expect("error finding key javascript_with_scope") + .expect("no key javascript_with_scope") + .as_javascript_with_scope() + .expect("was not javascript with scope"); + assert_eq!(js, "console.log(msg);"); + let (scope_key, scope_value_bson) = scopedoc + .into_iter() + .next() + .expect("no next value in scope") + .expect("invalid element"); + assert_eq!(scope_key, "ok"); + let scope_value = scope_value_bson.as_bool().expect("not a boolean"); + assert_eq!(scope_value, true); +} + +#[test] +fn int32() { + let rawdoc = RawDocument::from_document(&doc! { + "int32": 23i32, + }); + let int32 = rawdoc + .get("int32") + .expect("error finding key int32") + .expect("no key int32") + .as_i32() + .expect("was not int32"); + assert_eq!(int32, 23i32); +} + +#[test] +fn timestamp() { + let rawdoc = RawDocument::from_document(&doc! { + "timestamp": Bson::Timestamp(Timestamp { time: 3542578, increment: 7 }), + }); + let ts = rawdoc + .get("timestamp") + .expect("error finding key timestamp") + .expect("no key timestamp") + .as_timestamp() + .expect("was not a timestamp"); + + assert_eq!(ts.increment(), 7); + assert_eq!(ts.time(), 3542578); +} + +#[test] +fn int64() { + let rawdoc = RawDocument::from_document(&doc! { + "int64": 46i64, + }); + let int64 = rawdoc + .get("int64") + .expect("error finding key int64") + .expect("no key int64") + .as_i64() + .expect("was not int64"); + assert_eq!(int64, 46i64); +} +#[test] +fn document_iteration() { + let docbytes = to_bytes(&doc! { + "f64": 2.5, + "string": "hello", + "document": {}, + "array": ["binary", "serialized", "object", "notation"], + "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] }, + "object_id": ObjectId::with_bytes([1, 2, 3, 4, 5,6,7,8,9,10, 11,12]), + "boolean": true, + "datetime": Utc::now(), + "null": Bson::Null, + "regex": Bson::RegularExpression(Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}), + "javascript": Bson::JavaScriptCode(String::from("console.log(console);")), + "symbol": Bson::Symbol(String::from("artist-formerly-known-as")), + "javascript_with_scope": Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope{ code: String::from("console.log(msg);"), scope: doc!{"ok": true}}), + "int32": 23i32, + "timestamp": Bson::Timestamp(Timestamp { time: 3542578, increment: 0 }), + "int64": 46i64, + "end": "END", + }); + let rawdoc = unsafe { RawDocumentRef::new_unchecked(&docbytes) }; + + assert_eq!( + rawdoc + .into_iter() + .collect::, Error>>() + .expect("collecting iterated doc") + .len(), + 17 + ); + let end = rawdoc + .get("end") + .expect("error finding key end") + .expect("no key end") + .as_str() + .expect("was not str"); + assert_eq!(end, "END"); +} + +#[test] +fn into_bson_conversion() { + let docbytes = to_bytes(&doc! { + "f64": 2.5, + "string": "hello", + "document": {}, + "array": ["binary", "serialized", "object", "notation"], + "object_id": ObjectId::with_bytes([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), + "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] }, + "boolean": false, + }); + let rawbson = elem::RawBson::new(ElementType::EmbeddedDocument, &docbytes); + let b: Bson = rawbson.try_into().expect("invalid bson"); + let doc = b.as_document().expect("not a document"); + assert_eq!(*doc.get("f64").expect("f64 not found"), Bson::Double(2.5)); + assert_eq!( + *doc.get("string").expect("string not found"), + Bson::String(String::from("hello")) + ); + assert_eq!( + *doc.get("document").expect("document not found"), + Bson::Document(doc! {}) + ); + assert_eq!( + *doc.get("array").expect("array not found"), + Bson::Array( + vec!["binary", "serialized", "object", "notation"] + .into_iter() + .map(|s| Bson::String(String::from(s))) + .collect() + ) + ); + assert_eq!( + *doc.get("object_id").expect("object_id not found"), + Bson::ObjectId(ObjectId::with_bytes([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 + ])) + ); + assert_eq!( + *doc.get("binary").expect("binary not found"), + Bson::Binary(Binary { + subtype: BinarySubtype::Generic, + bytes: vec![1, 2, 3] + }) + ); + assert_eq!( + *doc.get("boolean").expect("boolean not found"), + Bson::Boolean(false) + ); +} + +use proptest::prelude::*; +use std::convert::TryInto; + +use super::{props::arbitrary_bson, RawDocument}; +use crate::doc; + +fn to_bytes(doc: &crate::Document) -> Vec { + let mut docbytes = Vec::new(); + doc.to_writer(&mut docbytes).unwrap(); + docbytes +} + +proptest! { + #[test] + fn no_crashes(s: Vec) { + let _ = RawDocument::new(s); + } + + #[test] + fn roundtrip_bson(bson in arbitrary_bson()) { + println!("{:?}", bson); + let doc = doc!{"bson": bson}; + let raw = to_bytes(&doc); + let raw = RawDocument::new(raw); + prop_assert!(raw.is_ok()); + let raw = raw.unwrap(); + let roundtrip: Result = raw.try_into(); + prop_assert!(roundtrip.is_ok()); + let roundtrip = roundtrip.unwrap(); + prop_assert_eq!(doc, roundtrip); + } +} From 0658a8180472c279653d3a59505062b6575aa14d Mon Sep 17 00:00:00 2001 From: Saghm Rossi Date: Tue, 16 Feb 2021 14:46:40 -0500 Subject: [PATCH 10/48] refactor top-level module into submodules --- src/raw/array.rs | 78 ++++++++++++++---------- src/raw/doc.rs | 152 +++++++++++++++++++++++++---------------------- src/raw/elem.rs | 38 ++++++------ src/raw/error.rs | 2 +- src/raw/mod.rs | 2 + 5 files changed, 149 insertions(+), 123 deletions(-) diff --git a/src/raw/array.rs b/src/raw/array.rs index f814fac2..b58db623 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -1,10 +1,24 @@ +use std::convert::TryFrom; + +use super::{ + Error, + RawBinary, + RawBson, + RawDocumentIter, + RawDocumentRef, + RawRegex, + RawTimestamp, + Result, +}; +use crate::{oid::ObjectId, Bson, DateTime}; + /// A BSON array referencing raw bytes stored elsewhere. pub struct RawArray { doc: RawDocumentRef, } impl RawArray { - fn new(data: &[u8]) -> RawResult<&RawArray> { + pub(super) fn new(data: &[u8]) -> Result<&RawArray> { Ok(RawArray::from_doc(RawDocumentRef::new(data)?)) } @@ -22,88 +36,88 @@ impl RawArray { } /// Gets a reference to the value at the given index. - pub fn get(&self, index: usize) -> RawResult>> { + pub fn get(&self, index: usize) -> Result>> { self.into_iter().nth(index).transpose() } fn get_with<'a, T>( &'a self, index: usize, - f: impl FnOnce(elem::RawBson<'a>) -> RawResult, - ) -> RawResult> { + f: impl FnOnce(RawBson<'a>) -> Result, + ) -> Result> { self.get(index)?.map(f).transpose() } /// Gets the BSON double at the given index or returns an error if the value at that index isn't /// a double. - pub fn get_f64(&self, index: usize) -> RawResult> { - self.get_with(index, elem::RawBson::as_f64) + pub fn get_f64(&self, index: usize) -> Result> { + self.get_with(index, RawBson::as_f64) } /// Gets a reference to the string at the given index or returns an error if the /// value at that index isn't a string. - pub fn get_str(&self, index: usize) -> RawResult> { - self.get_with(index, elem::RawBson::as_str) + pub fn get_str(&self, index: usize) -> Result> { + self.get_with(index, RawBson::as_str) } /// Gets a reference to the document at the given index or returns an error if the /// value at that index isn't a document. - pub fn get_document(&self, index: usize) -> RawResult> { - self.get_with(index, elem::RawBson::as_document) + pub fn get_document(&self, index: usize) -> Result> { + self.get_with(index, RawBson::as_document) } /// Gets a reference to the array at the given index or returns an error if the /// value at that index isn't a array. - pub fn get_array(&self, index: usize) -> RawResult> { - self.get_with(index, elem::RawBson::as_array) + pub fn get_array(&self, index: usize) -> Result> { + self.get_with(index, RawBson::as_array) } /// Gets a reference to the BSON binary value at the given index or returns an error if the /// value at that index isn't a binary. - pub fn get_binary(&self, index: usize) -> RawResult>> { - self.get_with(index, elem::RawBson::as_binary) + pub fn get_binary(&self, index: usize) -> Result>> { + self.get_with(index, RawBson::as_binary) } /// Gets the ObjectId at the given index or returns an error if the value at that index isn't an /// ObjectId. - pub fn get_object_id(&self, index: usize) -> RawResult> { - self.get_with(index, elem::RawBson::as_object_id) + pub fn get_object_id(&self, index: usize) -> Result> { + self.get_with(index, RawBson::as_object_id) } /// Gets the boolean at the given index or returns an error if the value at that index isn't a /// boolean. - pub fn get_bool(&self, index: usize) -> RawResult> { - self.get_with(index, elem::RawBson::as_bool) + pub fn get_bool(&self, index: usize) -> Result> { + self.get_with(index, RawBson::as_bool) } /// Gets the DateTime at the given index or returns an error if the value at that index isn't a /// DateTime. - pub fn get_datetime(&self, index: usize) -> RawResult>> { - self.get_with(index, elem::RawBson::as_datetime) + pub fn get_datetime(&self, index: usize) -> Result> { + Ok(self.get_with(index, RawBson::as_datetime)?.map(Into::into)) } /// Gets a reference to the BSON regex at the given index or returns an error if the /// value at that index isn't a regex. - pub fn get_regex(&self, index: usize) -> RawResult>> { - self.get_with(index, elem::RawBson::as_regex) + pub fn get_regex(&self, index: usize) -> Result>> { + self.get_with(index, RawBson::as_regex) } /// Gets a reference to the BSON timestamp at the given index or returns an error if the /// value at that index isn't a timestamp. - pub fn get_timestamp(&self, index: usize) -> RawResult>> { - self.get_with(index, elem::RawBson::as_timestamp) + pub fn get_timestamp(&self, index: usize) -> Result>> { + self.get_with(index, RawBson::as_timestamp) } /// Gets the BSON int32 at the given index or returns an error if the value at that index isn't /// a 32-bit integer. - pub fn get_i32(&self, index: usize) -> RawResult> { - self.get_with(index, elem::RawBson::as_i32) + pub fn get_i32(&self, index: usize) -> Result> { + self.get_with(index, RawBson::as_i32) } /// Gets BSON int64 at the given index or returns an error if the value at that index isn't a /// 64-bit integer. - pub fn get_i64(&self, index: usize) -> RawResult> { - self.get_with(index, elem::RawBson::as_i64) + pub fn get_i64(&self, index: usize) -> Result> { + self.get_with(index, RawBson::as_i64) } /// Gets a reference to the raw bytes of the RawArray. @@ -115,7 +129,7 @@ impl RawArray { impl TryFrom<&RawArray> for Vec { type Error = Error; - fn try_from(arr: &RawArray) -> RawResult> { + fn try_from(arr: &RawArray) -> Result> { arr.into_iter() .map(|result| { let rawbson = result?; @@ -127,7 +141,7 @@ impl TryFrom<&RawArray> for Vec { impl<'a> IntoIterator for &'a RawArray { type IntoIter = RawArrayIter<'a>; - type Item = RawResult>; + type Item = Result>; fn into_iter(self) -> RawArrayIter<'a> { RawArrayIter { @@ -142,9 +156,9 @@ pub struct RawArrayIter<'a> { } impl<'a> Iterator for RawArrayIter<'a> { - type Item = RawResult>; + type Item = Result>; - fn next(&mut self) -> Option>> { + fn next(&mut self) -> Option>> { match self.inner.next() { Some(Ok((_, v))) => Some(Ok(v)), Some(Err(e)) => Some(Err(e)), diff --git a/src/raw/doc.rs b/src/raw/doc.rs index 4565d541..f948e30c 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -6,10 +6,20 @@ use std::{ use chrono::{DateTime, Utc}; -use super::{i32_from_slice, Error, RawArray, RawBinary, RawBson, RawRegex, RawTimestamp, Result}; +use super::{ + i32_from_slice, + read_nullterminated, + Error, + RawArray, + RawBinary, + RawBson, + RawRegex, + RawTimestamp, + Result, +}; #[cfg(feature = "decimal128")] use crate::decimal128::Decimal128; -use crate::{oid::ObjectId, spec::ElementType, Bson, Document}; +use crate::{oid::ObjectId, spec::ElementType, Document}; /// A BSON document, stored as raw bytes on the heap. This can be created from a `Vec` or /// a [`bson::Document`]. @@ -22,14 +32,14 @@ use crate::{oid::ObjectId, spec::ElementType, Bson, Document}; /// original document without making any additional allocations. /// /// ``` -/// # use bson::raw::{RawDocument, RawError}; +/// # use bson::raw::{RawDocument, Error}; /// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; /// let mut iter = doc.iter(); /// let (key, value) = iter.next().unwrap()?; /// assert_eq!(key, "hi"); /// assert_eq!(value.as_str(), Ok("y'all")); /// assert!(iter.next().is_none()); -/// # Ok::<(), RawError>(()) +/// # Ok::<(), Error>(()) /// ``` /// /// Individual elements can be accessed using [`RawDocument::get`](RawDocument::get) or any of the @@ -39,10 +49,10 @@ use crate::{oid::ObjectId, spec::ElementType, Bson, Document}; /// requested key. /// /// ``` -/// # use bson::raw::{RawDocument, RawError}; +/// # use bson::raw::{RawDocument, Error}; /// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; /// assert_eq!(doc.get_str("hi")?, Some("y'all")); -/// # Ok::<(), RawError>(()) +/// # Ok::<(), Error>(()) /// ``` #[derive(Clone, Debug)] pub struct RawDocument { @@ -60,12 +70,12 @@ impl RawDocument { /// Note that the internal structure of the bytes representing the /// BSON elements is _not_ validated at all by this method. If the /// bytes do not conform to the BSON spec, then method calls on - /// the RawDocument will return RawErrors where appropriate. + /// the RawDocument will return Errors where appropriate. /// /// ``` - /// # use bson::raw::{RawDocument, RawError}; + /// # use bson::raw::{RawDocument, Error}; /// let doc = RawDocument::new(b"\x05\0\0\0\0".to_vec())?; - /// # Ok::<(), RawError>(()) + /// # Ok::<(), Error>(()) /// ``` pub fn new(data: Vec) -> Result { if data.len() < 5 { @@ -96,7 +106,7 @@ impl RawDocument { /// Create a RawDocument from a Document. /// /// ``` - /// # use bson::raw::{RawDocument, RawError}; + /// # use bson::raw::{RawDocument, Error}; /// use bson::{doc, oid::ObjectId}; /// /// let document = doc! { @@ -105,7 +115,7 @@ impl RawDocument { /// "title": "Moby-Dick", /// }; /// let doc = RawDocument::from_document(&document); - /// # Ok::<(), RawError>(()) + /// # Ok::<(), Error>(()) /// ``` pub fn from_document(doc: &Document) -> RawDocument { let mut data = Vec::new(); @@ -120,7 +130,7 @@ impl RawDocument { /// Element<'_>>`. /// /// ``` - /// # use bson::raw::{elem, RawDocument, RawError}; + /// # use bson::raw::{elem, RawDocument, Error}; /// use bson::doc; /// /// let doc = RawDocument::from_document(&doc! { "ferris": true }); @@ -130,7 +140,7 @@ impl RawDocument { /// assert_eq!(key, "ferris"); /// assert_eq!(value.as_bool()?, true); /// } - /// # Ok::<(), RawError>(()) + /// # Ok::<(), Error>(()) /// ``` /// /// # Note: @@ -208,14 +218,14 @@ impl ToOwned for RawDocumentRef { /// original document without making any additional allocations. /// ``` -/// # use bson::raw::{Doc, RawError}; +/// # use bson::raw::{Doc, Error}; /// let doc = RawDocumentRef::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00")?; /// let mut iter = doc.into_iter(); /// let (key, value) = iter.next().unwrap()?; /// assert_eq!(key, "hi"); /// assert_eq!(value.as_str(), Ok("y'all")); /// assert!(iter.next().is_none()); -/// # Ok::<(), RawError>(()) +/// # Ok::<(), Error>(()) /// ``` /// /// Individual elements can be accessed using [`RawDocumentRef::get`](RawDocumentRef::get) or any of @@ -225,10 +235,10 @@ impl ToOwned for RawDocumentRef { /// requires iterating through the document from the beginning to find the requested key. /// /// ``` -/// # use bson::raw::{RawDocument, RawError}; +/// # use bson::raw::{RawDocument, Error}; /// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; /// assert_eq!(doc.get_str("hi")?, Some("y'all")); -/// # Ok::<(), RawError>(()) +/// # Ok::<(), Error>(()) /// ``` #[derive(Debug)] pub struct RawDocumentRef { @@ -246,12 +256,12 @@ impl RawDocumentRef { /// Note that the internal structure of the bytes representing the /// BSON elements is _not_ validated at all by this method. If the /// bytes do not conform to the BSON spec, then method calls on - /// the RawDocument will return RawErrors where appropriate. + /// the RawDocument will return Errors where appropriate. /// /// ``` - /// # use bson::raw::{RawDocument, RawError}; + /// # use bson::raw::{RawDocument, Error}; /// let doc = RawDocumentRef::new(b"\x05\0\0\0\0")?; - /// # Ok::<(), RawError>(()) + /// # Ok::<(), Error>(()) /// ``` pub fn new + ?Sized>(data: &D) -> Result<&RawDocumentRef> { let data = data.as_ref(); @@ -296,13 +306,13 @@ impl RawDocumentRef { /// Creates a new RawDocument with an owned copy of the BSON bytes. /// /// ``` - /// # use bson::raw::{Doc, RawError}; + /// # use bson::raw::{Doc, Error}; /// use bson::raw::RawDocument; /// /// let data = b"\x05\0\0\0\0"; /// let doc_ref = RawDocumentRef::new(data)?; /// let doc: RawDocument = doc_ref.to_raw_document(); - /// # Ok::<(), RawError>(()) + /// # Ok::<(), Error>(()) pub fn to_raw_document(&self) -> RawDocument { RawDocument { data: self.data.to_owned().into_boxed_slice(), @@ -313,7 +323,7 @@ impl RawDocumentRef { /// found. /// /// ``` - /// # use bson::raw::{RawDocument, elem::Element, RawError}; + /// # use bson::raw::{RawDocument, elem::Element, Error}; /// # /// use bson::{doc, oid::ObjectId}; /// @@ -325,7 +335,7 @@ impl RawDocumentRef { /// let element = doc.get("f64")?.expect("finding key f64"); /// assert_eq!(element.as_f64(), Ok(2.5)); /// assert!(doc.get("unknown")?.is_none()); - /// # Ok::<(), RawError>(()) + /// # Ok::<(), Error>(()) /// ``` pub fn get<'a>(&'a self, key: &str) -> Result>> { for result in self.into_iter() { @@ -349,7 +359,7 @@ impl RawDocumentRef { /// if the key corresponds to a value which isn't a double. /// /// ``` - /// # use bson::raw::{RawDocument, elem::Element, RawError}; + /// # use bson::raw::{RawDocument, elem::Element, Error}; /// use bson::doc; /// /// let doc = RawDocument::from_document(&doc! { @@ -358,9 +368,9 @@ impl RawDocumentRef { /// }); /// /// assert_eq!(doc.get_f64("f64"), Ok(Some(2.5))); - /// assert_eq!(doc.get_f64("bool"), Err(RawError::UnexpectedType)); + /// assert_eq!(doc.get_f64("bool"), Err(Error::UnexpectedType)); /// assert_eq!(doc.get_f64("unknown"), Ok(None)); - /// # Ok::<(), RawError>(()) + /// # Ok::<(), Error>(()) /// ``` pub fn get_f64(&self, key: &str) -> Result> { self.get_with(key, RawBson::as_f64) @@ -370,7 +380,7 @@ impl RawDocumentRef { /// key corresponds to a value which isn't a string. /// /// ``` - /// # use bson::raw::{RawDocument, elem::Element, RawError}; + /// # use bson::raw::{RawDocument, elem::Element, Error}; /// use bson::doc; /// /// let doc = RawDocument::from_document(&doc! { @@ -379,9 +389,9 @@ impl RawDocumentRef { /// }); /// /// assert_eq!(doc.get_str("string"), Ok(Some("hello"))); - /// assert_eq!(doc.get_str("bool"), Err(RawError::UnexpectedType)); + /// assert_eq!(doc.get_str("bool"), Err(Error::UnexpectedType)); /// assert_eq!(doc.get_str("unknown"), Ok(None)); - /// # Ok::<(), RawError>(()) + /// # Ok::<(), Error>(()) /// ``` pub fn get_str<'a>(&'a self, key: &str) -> Result> { self.get_with(key, RawBson::as_str) @@ -391,7 +401,7 @@ impl RawDocumentRef { /// the key corresponds to a value which isn't a document. /// /// ``` - /// # use bson::raw::{RawDocument, elem::Element, RawError}; + /// # use bson::raw::{RawDocument, elem::Element, Error}; /// use bson::doc; /// /// let doc = RawDocument::from_document(&doc! { @@ -400,9 +410,9 @@ impl RawDocumentRef { /// }); /// /// assert_eq!(doc.get_document("doc")?.expect("finding key doc").get_str("key"), Ok(Some("value"))); - /// assert_eq!(doc.get_document("bool").unwrap_err(), RawError::UnexpectedType); + /// assert_eq!(doc.get_document("bool").unwrap_err(), Error::UnexpectedType); /// assert!(doc.get_document("unknown")?.is_none()); - /// # Ok::<(), RawError>(()) + /// # Ok::<(), Error>(()) /// ``` pub fn get_document<'a>(&'a self, key: &str) -> Result> { self.get_with(key, RawBson::as_document) @@ -412,7 +422,7 @@ impl RawDocumentRef { /// the key corresponds to a value which isn't an array. /// /// ``` - /// # use bson::raw::{RawDocument, elem::Element, RawError}; + /// # use bson::raw::{RawDocument, elem::Element, Error}; /// use bson::doc; /// /// let doc = RawDocument::from_document(&doc! { @@ -427,7 +437,7 @@ impl RawDocumentRef { /// assert!(arr_iter.next().is_none()); /// assert!(doc.get_array("bool").is_err()); /// assert!(doc.get_array("unknown")?.is_none()); - /// # Ok::<(), RawError>(()) + /// # Ok::<(), Error>(()) /// ``` pub fn get_array<'a>(&'a self, key: &str) -> Result> { self.get_with(key, RawBson::as_array) @@ -437,7 +447,7 @@ impl RawDocumentRef { /// if the key corresponds to a value which isn't a binary value. /// /// ``` - /// # use bson::raw::{RawDocument, elem, RawError}; + /// # use bson::raw::{RawDocument, elem, Error}; /// /// use bson::{ /// spec::BinarySubtype @@ -450,9 +460,9 @@ impl RawDocumentRef { /// }); /// /// assert_eq!(doc.get_binary("binary")?.map(elem::RawBsonBinary::as_bytes), Some(&[1, 2, 3][..])); - /// assert_eq!(doc.get_binary("bool").unwrap_err(), RawError::UnexpectedType); + /// assert_eq!(doc.get_binary("bool").unwrap_err(), Error::UnexpectedType); /// assert!(doc.get_binary("unknown")?.is_none()); - /// # Ok::<(), RawError>(()) + /// # Ok::<(), Error>(()) /// ``` pub fn get_binary<'a>(&'a self, key: &str) -> Result>> { self.get_with(key, RawBson::as_binary) @@ -462,7 +472,7 @@ impl RawDocumentRef { /// the key corresponds to a value which isn't an ObjectId. /// /// ``` - /// # use bson::raw::{RawDocument, RawError}; + /// # use bson::raw::{RawDocument, Error}; /// use bson::{doc, oid::ObjectId}; /// /// let doc = RawDocument::from_document(&doc! { @@ -471,9 +481,9 @@ impl RawDocumentRef { /// }); /// /// let oid = doc.get_object_id("_id")?.unwrap(); - /// assert_eq!(doc.get_object_id("bool").unwrap_err(), RawError::UnexpectedType); + /// assert_eq!(doc.get_object_id("bool").unwrap_err(), Error::UnexpectedType); /// assert!(doc.get_object_id("unknown")?.is_none()); - /// # Ok::<(), RawError>(()) + /// # Ok::<(), Error>(()) /// ``` pub fn get_object_id(&self, key: &str) -> Result> { self.get_with(key, RawBson::as_object_id) @@ -483,7 +493,7 @@ impl RawDocumentRef { /// the key corresponds to a value which isn't a boolean. /// /// ``` - /// # use bson::raw::{RawDocument, RawError}; + /// # use bson::raw::{RawDocument, Error}; /// use bson::{doc, oid::ObjectId}; /// /// let doc = RawDocument::from_document(&doc! { @@ -492,9 +502,9 @@ impl RawDocumentRef { /// }); /// /// assert!(doc.get_bool("bool")?.unwrap()); - /// assert_eq!(doc.get_bool("_id").unwrap_err(), RawError::UnexpectedType); + /// assert_eq!(doc.get_bool("_id").unwrap_err(), Error::UnexpectedType); /// assert!(doc.get_object_id("unknown")?.is_none()); - /// # Ok::<(), RawError>(()) + /// # Ok::<(), Error>(()) /// ``` pub fn get_bool(&self, key: &str) -> Result> { self.get_with(key, RawBson::as_bool) @@ -504,7 +514,7 @@ impl RawDocumentRef { /// if the key corresponds to a value which isn't a DateTime. /// /// ``` - /// # use bson::raw::{RawDocument, RawError}; + /// # use bson::raw::{RawDocument, Error}; /// use bson::doc; /// use chrono::{Utc, Datelike, TimeZone}; /// @@ -513,9 +523,9 @@ impl RawDocumentRef { /// "bool": true, /// }); /// assert_eq!(doc.get_datetime("created_at")?.unwrap().year(), 2020); - /// assert_eq!(doc.get_datetime("bool").unwrap_err(), RawError::UnexpectedType); + /// assert_eq!(doc.get_datetime("bool").unwrap_err(), Error::UnexpectedType); /// assert!(doc.get_datetime("unknown")?.is_none()); - /// # Ok::<(), RawError>(()) + /// # Ok::<(), Error>(()) /// ``` pub fn get_datetime(&self, key: &str) -> Result>> { self.get_with(key, RawBson::as_datetime) @@ -524,7 +534,7 @@ impl RawDocumentRef { /// the key corresponds to a value which isn't a regex. /// /// ``` - /// # use bson::raw::{RawDocument, RawError, elem}; + /// # use bson::raw::{RawDocument, Error, elem}; /// use bson::{doc, Regex}; /// /// let doc = RawDocument::from_document(&doc! { @@ -537,9 +547,9 @@ impl RawDocumentRef { /// /// assert_eq!(doc.get_regex("regex")?.unwrap().pattern(), r"end\s*$"); /// assert_eq!(doc.get_regex("regex")?.unwrap().options(), "i"); - /// assert_eq!(doc.get_regex("bool").unwrap_err(), RawError::UnexpectedType); + /// assert_eq!(doc.get_regex("bool").unwrap_err(), Error::UnexpectedType); /// assert!(doc.get_regex("unknown")?.is_none()); - /// # Ok::<(), RawError>(()) + /// # Ok::<(), Error>(()) /// ``` pub fn get_regex<'a>(&'a self, key: &str) -> Result>> { self.get_with(key, RawBson::as_regex) @@ -549,7 +559,7 @@ impl RawDocumentRef { /// error if the key corresponds to a value which isn't a timestamp. /// /// ``` - /// # use bson::raw::{RawDocument, elem, RawError}; + /// # use bson::raw::{RawDocument, elem, Error}; /// use bson::{doc, Timestamp}; /// /// let doc = RawDocument::from_document(&doc! { @@ -561,9 +571,9 @@ impl RawDocumentRef { /// /// assert_eq!(timestamp.time(), 649876543); /// assert_eq!(timestamp.increment(), 9); - /// assert_eq!(doc.get_timestamp("bool"), Err(RawError::UnexpectedType)); + /// assert_eq!(doc.get_timestamp("bool"), Err(Error::UnexpectedType)); /// assert_eq!(doc.get_timestamp("unknown"), Ok(None)); - /// # Ok::<(), RawError>(()) + /// # Ok::<(), Error>(()) /// ``` pub fn get_timestamp<'a>(&'a self, key: &str) -> Result>> { self.get_with(key, RawBson::as_timestamp) @@ -573,7 +583,7 @@ impl RawDocumentRef { /// the key corresponds to a value which isn't a 32-bit integer. /// /// ``` - /// # use bson::raw::{RawDocument, RawError}; + /// # use bson::raw::{RawDocument, Error}; /// use bson::doc; /// /// let doc = RawDocument::from_document(&doc! { @@ -582,9 +592,9 @@ impl RawDocumentRef { /// }); /// /// assert_eq!(doc.get_i32("i32"), Ok(Some(1_000_000))); - /// assert_eq!(doc.get_i32("bool"), Err(RawError::UnexpectedType)); + /// assert_eq!(doc.get_i32("bool"), Err(Error::UnexpectedType)); /// assert_eq!(doc.get_i32("unknown"), Ok(None)); - /// # Ok::<(), RawError>(()) + /// # Ok::<(), Error>(()) /// ``` pub fn get_i32(&self, key: &str) -> Result> { self.get_with(key, RawBson::as_i32) @@ -594,7 +604,7 @@ impl RawDocumentRef { /// the key corresponds to a value which isn't a 64-bit integer. /// /// ``` - /// # use bson::raw::{RawDocument, elem::Element, RawError}; + /// # use bson::raw::{RawDocument, elem::Element, Error}; /// use bson::doc; /// /// let doc = RawDocument::from_document(&doc! { @@ -603,9 +613,9 @@ impl RawDocumentRef { /// }); /// /// assert_eq!(doc.get_i64("i64"), Ok(Some(9223372036854775807))); - /// assert_eq!(doc.get_i64("bool"), Err(RawError::UnexpectedType)); + /// assert_eq!(doc.get_i64("bool"), Err(Error::UnexpectedType)); /// assert_eq!(doc.get_i64("unknown"), Ok(None)); - /// # Ok::<(), RawError>(()) + /// # Ok::<(), Error>(()) /// ``` pub fn get_i64(&self, key: &str) -> Result> { self.get_with(key, RawBson::as_i64) @@ -651,7 +661,7 @@ impl TryFrom<&RawDocumentRef> for crate::Document { impl<'a> IntoIterator for &'a RawDocumentRef { type IntoIter = RawDocumentIter<'a>; - type Item = RawResult<(&'a str, RawBson<'a>)>; + type Item = Result<(&'a str, RawBson<'a>)>; fn into_iter(self) -> RawDocumentIter<'a> { RawDocumentIter { @@ -667,15 +677,15 @@ pub struct RawDocumentIter<'a> { } impl<'a> Iterator for RawDocumentIter<'a> { - type Item = RawResult<(&'a str, elem::RawBson<'a>)>; + type Item = Result<(&'a str, RawBson<'a>)>; - fn next(&mut self) -> Option)>> { + fn next(&mut self) -> Option)>> { if self.offset == self.doc.data.len() - 1 { if self.doc.data[self.offset] == 0 { // end of document marker return None; } else { - return Some(Err(RawError::MalformedValue { + return Some(Err(Error::MalformedValue { message: "document not null terminated".into(), })); } @@ -691,7 +701,7 @@ impl<'a> Iterator for RawDocumentIter<'a> { let element_type = match ElementType::from(self.doc.data[self.offset]) { Some(et) => et, None => { - return Some(Err(RawError::MalformedValue { + return Some(Err(Error::MalformedValue { message: format!("invalid tag: {}", self.doc.data[self.offset]), })) } @@ -704,7 +714,7 @@ impl<'a> Iterator for RawDocumentIter<'a> { 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; if self.doc.data[valueoffset + size - 1] != 0 { - return Some(Err(RawError::MalformedValue { + return Some(Err(Error::MalformedValue { message: "string not null terminated".into(), })); } @@ -715,7 +725,7 @@ impl<'a> Iterator for RawDocumentIter<'a> { let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; if self.doc.data[valueoffset + size - 1] != 0 { - return Some(Err(RawError::MalformedValue { + return Some(Err(Error::MalformedValue { message: "document not null terminated".into(), })); } @@ -726,7 +736,7 @@ impl<'a> Iterator for RawDocumentIter<'a> { let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; if self.doc.data[valueoffset + size - 1] != 0 { - return Some(Err(RawError::MalformedValue { + return Some(Err(Error::MalformedValue { message: "array not null terminated".into(), })); } @@ -762,7 +772,7 @@ impl<'a> Iterator for RawDocumentIter<'a> { let id_size = 12; if self.doc.data[valueoffset + string_size - 1] != 0 { - return Some(Err(RawError::MalformedValue { + return Some(Err(Error::MalformedValue { message: "DBPointer string not null-terminated".into(), })); } @@ -774,7 +784,7 @@ impl<'a> Iterator for RawDocumentIter<'a> { 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; if self.doc.data[valueoffset + size - 1] != 0 { - return Some(Err(RawError::MalformedValue { + return Some(Err(Error::MalformedValue { message: "javascript code not null-terminated".into(), })); } @@ -788,7 +798,7 @@ impl<'a> Iterator for RawDocumentIter<'a> { let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; if self.doc.data[valueoffset + size - 1] != 0 { - return Some(Err(RawError::MalformedValue { + return Some(Err(Error::MalformedValue { message: "javascript with scope not null-terminated".into(), })); } @@ -808,7 +818,7 @@ impl<'a> Iterator for RawDocumentIter<'a> { Some(Ok(( key, - elem::RawBson::new(element_type, &self.doc.data[valueoffset..nextoffset]), + RawBson::new(element_type, &self.doc.data[valueoffset..nextoffset]), ))) } } diff --git a/src/raw/elem.rs b/src/raw/elem.rs index 549d979e..24bc108d 100644 --- a/src/raw/elem.rs +++ b/src/raw/elem.rs @@ -16,7 +16,7 @@ use super::{ Error, RawArray, RawDocumentRef, - RawResult, + Result, }; use crate::{ oid::ObjectId, @@ -47,7 +47,7 @@ impl<'a> RawBson<'a> { } /// Gets the f64 that's referenced or returns an error if the value isn't a BSON double. - pub fn as_f64(self) -> RawResult { + pub fn as_f64(self) -> Result { if let ElementType::Double = self.element_type { Ok(f64::from_bits(u64::from_le_bytes( self.data.try_into().map_err(|_| Error::MalformedValue { @@ -60,7 +60,7 @@ impl<'a> RawBson<'a> { } /// Gets the string that's referenced or returns an error if the value isn't a BSON string. - pub fn as_str(self) -> RawResult<&'a str> { + pub fn as_str(self) -> Result<&'a str> { if let ElementType::String = self.element_type { read_lenencoded(self.data) } else { @@ -69,7 +69,7 @@ impl<'a> RawBson<'a> { } /// Gets the document that's referenced or returns an error if the value isn't a BSON document. - pub fn as_document(self) -> RawResult<&'a RawDocumentRef> { + pub fn as_document(self) -> Result<&'a RawDocumentRef> { if let ElementType::EmbeddedDocument = self.element_type { RawDocumentRef::new(self.data) } else { @@ -78,7 +78,7 @@ impl<'a> RawBson<'a> { } /// Gets the array that's referenced or returns an error if the value isn't a BSON array. - pub fn as_array(self) -> RawResult<&'a RawArray> { + pub fn as_array(self) -> Result<&'a RawArray> { if let ElementType::Array = self.element_type { RawArray::new(self.data) } else { @@ -87,7 +87,7 @@ impl<'a> RawBson<'a> { } /// Gets the BSON binary value that's referenced or returns an error if the value a BSON binary. - pub fn as_binary(self) -> RawResult> { + pub fn as_binary(self) -> Result> { if let ElementType::Binary = self.element_type { let length = i32_from_slice(&self.data[0..4]); let subtype = BinarySubtype::from(self.data[4]); // TODO: This mishandles reserved values @@ -120,7 +120,7 @@ impl<'a> RawBson<'a> { } /// Gets the ObjectId that's referenced or returns an error if the value isn't a BSON ObjectId. - pub fn as_object_id(self) -> RawResult { + pub fn as_object_id(self) -> Result { if let ElementType::ObjectId = self.element_type { Ok(ObjectId::with_bytes(self.data.try_into().map_err( |_| Error::MalformedValue { @@ -133,7 +133,7 @@ impl<'a> RawBson<'a> { } /// Gets the boolean that's referenced or returns an error if the value isn't a BSON boolean. - pub fn as_bool(self) -> RawResult { + pub fn as_bool(self) -> Result { if let ElementType::Boolean = self.element_type { if self.data.len() != 1 { Err(Error::MalformedValue { @@ -154,7 +154,7 @@ impl<'a> RawBson<'a> { } /// Gets the DateTime that's referenced or returns an error if the value isn't a BSON DateTime. - pub fn as_datetime(self) -> RawResult> { + pub fn as_datetime(self) -> Result> { if let ElementType::DateTime = self.element_type { let millis = i64_from_slice(self.data); if millis >= 0 { @@ -180,7 +180,7 @@ impl<'a> RawBson<'a> { } /// Gets the regex that's referenced or returns an error if the value isn't a BSON regex. - pub fn as_regex(self) -> RawResult> { + pub fn as_regex(self) -> Result> { if let ElementType::RegularExpression = self.element_type { RawRegex::new(self.data) } else { @@ -190,7 +190,7 @@ impl<'a> RawBson<'a> { /// Gets the BSON JavaScript code that's referenced or returns an error if the value isn't BSON /// JavaScript code. - pub fn as_javascript(self) -> RawResult<&'a str> { + pub fn as_javascript(self) -> Result<&'a str> { if let ElementType::JavaScriptCode = self.element_type { read_lenencoded(self.data) } else { @@ -199,7 +199,7 @@ impl<'a> RawBson<'a> { } /// Gets the symbol that's referenced or returns an error if the value isn't a BSON symbol. - pub fn as_symbol(self) -> RawResult<&'a str> { + pub fn as_symbol(self) -> Result<&'a str> { if let ElementType::Symbol = self.element_type { read_lenencoded(self.data) } else { @@ -209,7 +209,7 @@ impl<'a> RawBson<'a> { /// Gets the BSON JavaScript code with scope that's referenced or returns an error if the value /// isn't BSON JavaScript code with scope. - pub fn as_javascript_with_scope(self) -> RawResult> { + pub fn as_javascript_with_scope(self) -> Result> { if let ElementType::JavaScriptCodeWithScope = self.element_type { let length = i32_from_slice(&self.data[..4]); assert_eq!(self.data.len() as i32, length); @@ -225,7 +225,7 @@ impl<'a> RawBson<'a> { /// Gets the timestamp that's referenced or returns an error if the value isn't a BSON /// timestamp. - pub fn as_timestamp(self) -> RawResult> { + pub fn as_timestamp(self) -> Result> { if let ElementType::Timestamp = self.element_type { assert_eq!(self.data.len(), 8); Ok(RawTimestamp { data: self.data }) @@ -235,7 +235,7 @@ impl<'a> RawBson<'a> { } /// Gets the i32 that's referenced or returns an error if the value isn't a BSON int32. - pub fn as_i32(self) -> RawResult { + pub fn as_i32(self) -> Result { if let ElementType::Int32 = self.element_type { assert_eq!(self.data.len(), 4); Ok(i32_from_slice(self.data)) @@ -245,7 +245,7 @@ impl<'a> RawBson<'a> { } /// Gets the i64 that's referenced or returns an error if the value isn't a BSON int64. - pub fn as_i64(self) -> RawResult { + pub fn as_i64(self) -> Result { if let ElementType::Int64 = self.element_type { assert_eq!(self.data.len(), 8); Ok(i64_from_slice(self.data)) @@ -256,7 +256,7 @@ impl<'a> RawBson<'a> { /// Gets the decimal that's referenced or returns an error if the value isn't a BSON Decimal128. #[cfg(feature = "decimal128")] - pub fn as_decimal128(self) -> RawResult { + pub fn as_decimal128(self) -> Result { if let ElementType::Decimal128 = self.element_type { assert_eq!(self.data.len(), 16); Ok(d128_from_slice(self.data)) @@ -269,7 +269,7 @@ impl<'a> RawBson<'a> { impl<'a> TryFrom> for Bson { type Error = Error; - fn try_from(rawbson: RawBson<'a>) -> RawResult { + fn try_from(rawbson: RawBson<'a>) -> Result { Ok(match rawbson.element_type { ElementType::Double => Bson::Double(rawbson.as_f64()?), ElementType::String => Bson::String(String::from(rawbson.as_str()?)), @@ -367,7 +367,7 @@ pub struct RawRegex<'a> { } impl<'a> RawRegex<'a> { - pub(super) fn new(data: &'a [u8]) -> RawResult> { + pub(super) fn new(data: &'a [u8]) -> Result> { let pattern = read_nullterminated(data)?; let opts = read_nullterminated(&data[pattern.len() + 1..])?; if pattern.len() + opts.len() == data.len() - 2 { diff --git a/src/raw/error.rs b/src/raw/error.rs index 548bee5e..269b3d29 100644 --- a/src/raw/error.rs +++ b/src/raw/error.rs @@ -24,4 +24,4 @@ impl std::fmt::Display for Error { impl std::error::Error for Error {} -pub type Result = Result; +pub type Result = std::result::Result; diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 5f37dc26..40e63ee8 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -111,6 +111,8 @@ mod props; #[cfg(test)] mod test; +use std::convert::TryInto; + pub use self::{ array::{RawArray, RawArrayIter}, doc::{RawDocument, RawDocumentIter, RawDocumentRef}, From 07b4e5781c698bef62b893922d2a398f8e20598b Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Mon, 4 Oct 2021 19:48:04 -0400 Subject: [PATCH 11/48] code compiling, tests passing --- src/raw/doc.rs | 85 ++++++++++++++++++++++++------------------------ src/raw/elem.rs | 40 +++++++++-------------- src/raw/mod.rs | 24 +++++++------- src/raw/props.rs | 13 +++++--- src/raw/test.rs | 65 +++++++++++++++++++----------------- 5 files changed, 113 insertions(+), 114 deletions(-) diff --git a/src/raw/doc.rs b/src/raw/doc.rs index f948e30c..f7badee9 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -4,7 +4,7 @@ use std::{ ops::Deref, }; -use chrono::{DateTime, Utc}; +use crate::DateTime; use super::{ i32_from_slice, @@ -130,8 +130,8 @@ impl RawDocument { /// Element<'_>>`. /// /// ``` - /// # use bson::raw::{elem, RawDocument, Error}; - /// use bson::doc; + /// # use bson::raw::Error; + /// use bson::{doc, raw::RawDocument}; /// /// let doc = RawDocument::from_document(&doc! { "ferris": true }); /// @@ -218,7 +218,9 @@ impl ToOwned for RawDocumentRef { /// original document without making any additional allocations. /// ``` -/// # use bson::raw::{Doc, Error}; +/// # use bson::raw::{Error}; +/// use bson::raw::RawDocumentRef; +/// /// let doc = RawDocumentRef::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00")?; /// let mut iter = doc.into_iter(); /// let (key, value) = iter.next().unwrap()?; @@ -259,9 +261,10 @@ impl RawDocumentRef { /// the RawDocument will return Errors where appropriate. /// /// ``` - /// # use bson::raw::{RawDocument, Error}; + /// use bson::raw::RawDocumentRef; + /// /// let doc = RawDocumentRef::new(b"\x05\0\0\0\0")?; - /// # Ok::<(), Error>(()) + /// # Ok::<(), bson::raw::Error>(()) /// ``` pub fn new + ?Sized>(data: &D) -> Result<&RawDocumentRef> { let data = data.as_ref(); @@ -306,8 +309,7 @@ impl RawDocumentRef { /// Creates a new RawDocument with an owned copy of the BSON bytes. /// /// ``` - /// # use bson::raw::{Doc, Error}; - /// use bson::raw::RawDocument; + /// use bson::raw::{RawDocumentRef, RawDocument, Error}; /// /// let data = b"\x05\0\0\0\0"; /// let doc_ref = RawDocumentRef::new(data)?; @@ -323,9 +325,8 @@ impl RawDocumentRef { /// found. /// /// ``` - /// # use bson::raw::{RawDocument, elem::Element, Error}; - /// # - /// use bson::{doc, oid::ObjectId}; + /// # use bson::raw::Error; + /// use bson::{doc, oid::ObjectId, raw::{RawDocument, RawBson}}; /// /// let doc = RawDocument::from_document(&doc! { /// "_id": ObjectId::new(), @@ -359,7 +360,8 @@ impl RawDocumentRef { /// if the key corresponds to a value which isn't a double. /// /// ``` - /// # use bson::raw::{RawDocument, elem::Element, Error}; + /// # use bson::raw::Error; + /// use bson::raw::RawDocument; /// use bson::doc; /// /// let doc = RawDocument::from_document(&doc! { @@ -380,8 +382,7 @@ impl RawDocumentRef { /// key corresponds to a value which isn't a string. /// /// ``` - /// # use bson::raw::{RawDocument, elem::Element, Error}; - /// use bson::doc; + /// use bson::{doc, raw::{RawDocument, Error}}; /// /// let doc = RawDocument::from_document(&doc! { /// "string": "hello", @@ -401,8 +402,8 @@ impl RawDocumentRef { /// the key corresponds to a value which isn't a document. /// /// ``` - /// # use bson::raw::{RawDocument, elem::Element, Error}; - /// use bson::doc; + /// use bson::raw::Error; + /// use bson::{doc, raw::RawDocument}; /// /// let doc = RawDocument::from_document(&doc! { /// "doc": { "key": "value"}, @@ -422,17 +423,17 @@ impl RawDocumentRef { /// the key corresponds to a value which isn't an array. /// /// ``` - /// # use bson::raw::{RawDocument, elem::Element, Error}; - /// use bson::doc; + /// # use bson::raw::Error; + /// use bson::{doc, raw::RawDocument}; /// /// let doc = RawDocument::from_document(&doc! { /// "array": [true, 3], /// "bool": true, /// }); /// - /// let mut arr_iter = docbuf.get_array("array")?.expect("finding key array").into_iter(); - /// let _: bool = arriter.next().unwrap()?.as_bool()?; - /// let _: i32 = arriter.next().unwrap()?.as_i32()?; + /// let mut arr_iter = doc.get_array("array")?.expect("finding key array").into_iter(); + /// let _: bool = arr_iter.next().unwrap()?.as_bool()?; + /// let _: i32 = arr_iter.next().unwrap()?.as_i32()?; /// /// assert!(arr_iter.next().is_none()); /// assert!(doc.get_array("bool").is_err()); @@ -447,11 +448,12 @@ impl RawDocumentRef { /// if the key corresponds to a value which isn't a binary value. /// /// ``` - /// # use bson::raw::{RawDocument, elem, Error}; - /// + /// # use bson::raw::Error; /// use bson::{ - /// spec::BinarySubtype - /// doc, Binary, + /// doc, + /// raw::{RawDocument, RawBinary}, + /// spec::BinarySubtype, + /// Binary, /// }; /// /// let doc = RawDocument::from_document(&doc! { @@ -459,7 +461,7 @@ impl RawDocumentRef { /// "bool": true, /// }); /// - /// assert_eq!(doc.get_binary("binary")?.map(elem::RawBsonBinary::as_bytes), Some(&[1, 2, 3][..])); + /// assert_eq!(doc.get_binary("binary")?.map(RawBinary::as_bytes), Some(&[1, 2, 3][..])); /// assert_eq!(doc.get_binary("bool").unwrap_err(), Error::UnexpectedType); /// assert!(doc.get_binary("unknown")?.is_none()); /// # Ok::<(), Error>(()) @@ -472,8 +474,8 @@ impl RawDocumentRef { /// the key corresponds to a value which isn't an ObjectId. /// /// ``` - /// # use bson::raw::{RawDocument, Error}; - /// use bson::{doc, oid::ObjectId}; + /// # use bson::raw::Error; + /// use bson::{doc, oid::ObjectId, raw::RawDocument}; /// /// let doc = RawDocument::from_document(&doc! { /// "_id": ObjectId::new(), @@ -514,28 +516,28 @@ impl RawDocumentRef { /// if the key corresponds to a value which isn't a DateTime. /// /// ``` - /// # use bson::raw::{RawDocument, Error}; - /// use bson::doc; - /// use chrono::{Utc, Datelike, TimeZone}; + /// # use bson::raw::Error; + /// use bson::{doc, raw::RawDocument, DateTime}; /// + /// let dt = DateTime::now(); /// let doc = RawDocument::from_document(&doc! { - /// "created_at": Utc.ymd(2020, 3, 15).and_hms(17, 0, 0), + /// "created_at": dt, /// "bool": true, /// }); - /// assert_eq!(doc.get_datetime("created_at")?.unwrap().year(), 2020); + /// assert_eq!(doc.get_datetime("created_at")?, Some(dt)); /// assert_eq!(doc.get_datetime("bool").unwrap_err(), Error::UnexpectedType); /// assert!(doc.get_datetime("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` - pub fn get_datetime(&self, key: &str) -> Result>> { + pub fn get_datetime(&self, key: &str) -> Result> { self.get_with(key, RawBson::as_datetime) } + /// Gets a reference to the BSON regex value corresponding to a given key or returns an error if /// the key corresponds to a value which isn't a regex. /// /// ``` - /// # use bson::raw::{RawDocument, Error, elem}; - /// use bson::{doc, Regex}; + /// use bson::{doc, Regex, raw::{RawDocument, Error}}; /// /// let doc = RawDocument::from_document(&doc! { /// "regex": Regex { @@ -559,8 +561,7 @@ impl RawDocumentRef { /// error if the key corresponds to a value which isn't a timestamp. /// /// ``` - /// # use bson::raw::{RawDocument, elem, Error}; - /// use bson::{doc, Timestamp}; + /// use bson::{doc, Timestamp, raw::{RawDocument, Error}}; /// /// let doc = RawDocument::from_document(&doc! { /// "bool": true, @@ -604,8 +605,8 @@ impl RawDocumentRef { /// the key corresponds to a value which isn't a 64-bit integer. /// /// ``` - /// # use bson::raw::{RawDocument, elem::Element, Error}; - /// use bson::doc; + /// # use bson::raw::Error; + /// use bson::{doc, raw::RawDocument}; /// /// let doc = RawDocument::from_document(&doc! { /// "bool": true, @@ -624,8 +625,8 @@ impl RawDocumentRef { /// Return a reference to the contained data as a `&[u8]` /// /// ``` - /// # use bson::raw::RawDocument; - /// use bson::doc; + /// # use bson::raw::Error; + /// use bson::{doc, raw::RawDocument}; /// let docbuf = RawDocument::from_document(&doc!{}); /// assert_eq!(docbuf.as_bytes(), b"\x05\x00\x00\x00\x00"); /// ``` diff --git a/src/raw/elem.rs b/src/raw/elem.rs index 24bc108d..8b4b5e6d 100644 --- a/src/raw/elem.rs +++ b/src/raw/elem.rs @@ -1,9 +1,7 @@ -use std::{ - convert::{TryFrom, TryInto}, - time::Duration, -}; +use std::convert::{TryFrom, TryInto}; -use chrono::{DateTime, TimeZone, Utc}; +// use chrono::{DateTime, TimeZone, Utc}; +use crate::DateTime; #[cfg(feature = "decimal128")] use super::d128_from_slice; @@ -122,7 +120,7 @@ impl<'a> RawBson<'a> { /// Gets the ObjectId that's referenced or returns an error if the value isn't a BSON ObjectId. pub fn as_object_id(self) -> Result { if let ElementType::ObjectId = self.element_type { - Ok(ObjectId::with_bytes(self.data.try_into().map_err( + Ok(ObjectId::from_bytes(self.data.try_into().map_err( |_| Error::MalformedValue { message: "object id should be 12 bytes long".into(), }, @@ -154,26 +152,10 @@ impl<'a> RawBson<'a> { } /// Gets the DateTime that's referenced or returns an error if the value isn't a BSON DateTime. - pub fn as_datetime(self) -> Result> { + pub fn as_datetime(self) -> Result { if let ElementType::DateTime = self.element_type { let millis = i64_from_slice(self.data); - if millis >= 0 { - let duration = Duration::from_millis(millis as u64); - Ok(Utc.timestamp( - duration.as_secs().try_into().unwrap(), - duration.subsec_nanos(), - )) - } else { - let duration = Duration::from_millis((-millis).try_into().unwrap()); - let mut secs: i64 = duration.as_secs().try_into().unwrap(); - secs *= -1; - let mut nanos = duration.subsec_nanos(); - if nanos > 0 { - secs -= 1; - nanos = 1_000_000_000 - nanos; - } - Ok(Utc.timestamp(secs, nanos)) - } + Ok(DateTime::from_millis(millis)) } else { Err(Error::UnexpectedType) } @@ -261,7 +243,15 @@ impl<'a> RawBson<'a> { assert_eq!(self.data.len(), 16); Ok(d128_from_slice(self.data)) } else { - Err(RawError::UnexpectedType) + Err(Error::UnexpectedType) + } + } + + pub fn as_null(self) -> Result<()> { + if let ElementType::Null = self.element_type { + Ok(()) + } else { + Err(Error::UnexpectedType) } } } diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 40e63ee8..2aafee68 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -11,13 +11,13 @@ //! //! // See http://bsonspec.org/spec.html for details on the binary encoding of BSON. //! let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; -//! let elem: Option = doc.get("hi")?; +//! let elem = doc.get("hi")?.unwrap(); //! //! assert_eq!( -//! elem?.as_str()?, +//! elem.as_str()?, //! "y'all", //! ); -//! # Ok::<(), bson::raw::RawError>(()) +//! # Ok::<(), bson::raw::Error>(()) //! ``` //! //! ### bson-rust interop @@ -48,7 +48,7 @@ //! value, //! Some("world"), //! ); -//! # Ok::<(), bson::raw::RawError>(()) +//! # Ok::<(), bson::raw::Error>(()) //! ``` //! //! ### Reference types @@ -60,22 +60,20 @@ //! //! The below example constructs a bson document in a stack-based array, //! and extracts a &str from it, performing no heap allocation. - //! ```rust -//! use bson::raw::Doc; +//! use bson::raw::RawDocumentRef; //! //! let bytes = b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00"; //! assert_eq!(RawDocumentRef::new(bytes)?.get_str("hi")?, Some("y'all")); -//! # Ok::<(), bson::raw::RawError>(()) +//! # Ok::<(), bson::raw::Error>(()) //! ``` -//! +//! //! ### Iteration //! //! [`RawDocumentRef`] implements [`IntoIterator`](std::iter::IntoIterator), which can also be //! accessed via [`RawDocument::iter`]. //! ```rust -//! use bson::doc; //! use bson::{ //! raw::{ //! RawBson, @@ -92,14 +90,14 @@ //! let doc = RawDocument::from_document(&original_doc); //! let mut doc_iter = doc.iter(); //! -//! let (key, value): (&str, Element) = doc_iter.next().unwrap()?; +//! let (key, value): (&str, RawBson) = doc_iter.next().unwrap()?; //! assert_eq!(key, "crate"); -//! assert_eq!(value.as_str()?, "rawbson"); +//! assert_eq!(value.as_str()?, "bson"); //! -//! let (key, value): (&str, Element) = doc_iter.next().unwrap()?; +//! let (key, value): (&str, RawBson) = doc_iter.next().unwrap()?; //! assert_eq!(key, "year"); //! assert_eq!(value.as_str()?, "2021"); -//! # Ok::<(), bson::raw::RawError>(()) +//! # Ok::<(), bson::raw::Error>(()) //! ``` mod array; diff --git a/src/raw/props.rs b/src/raw/props.rs index 23ea21fd..850dcade 100644 --- a/src/raw/props.rs +++ b/src/raw/props.rs @@ -21,9 +21,14 @@ pub(crate) fn arbitrary_bson() -> impl Strategy { any::().prop_map(Bson::Double), any::().prop_map(Bson::Int32), any::().prop_map(Bson::Int64), - any::<(String, String)>() - .prop_map(|(pattern, options)| Bson::RegularExpression(Regex { pattern, options })), - any::<[u8; 12]>().prop_map(|bytes| Bson::ObjectId(crate::oid::ObjectId::with_bytes(bytes))), + any::<(String, String)>().prop_map(|(pattern, options)| { + let mut chars: Vec<_> = options.chars().collect(); + chars.sort_unstable(); + + let options: String = chars.into_iter().collect(); + Bson::RegularExpression(Regex { pattern, options }) + }), + any::<[u8; 12]>().prop_map(|bytes| Bson::ObjectId(crate::oid::ObjectId::from_bytes(bytes))), (arbitrary_binary_subtype(), any::>()).prop_map(|(subtype, bytes)| { let bytes = if let BinarySubtype::BinaryOld = subtype { // BinarySubtype::BinaryOld expects a four byte prefix, which the bson::Bson type @@ -32,7 +37,7 @@ pub(crate) fn arbitrary_bson() -> impl Strategy { let mut newbytes = Vec::with_capacity(bytes.len() + 4); newbytes.extend_from_slice(&(bytes.len() as i32).to_le_bytes()); newbytes.extend_from_slice(&bytes); - newbytes + newbytes } else { bytes }; diff --git a/src/raw/test.rs b/src/raw/test.rs index 9a801198..669ca4a1 100644 --- a/src/raw/test.rs +++ b/src/raw/test.rs @@ -1,6 +1,16 @@ use super::*; -use crate::{doc, spec::BinarySubtype, Binary, Bson, JavaScriptCodeWithScope, Regex, Timestamp}; -use chrono::TimeZone; +use crate::{ + doc, + oid::ObjectId, + spec::{BinarySubtype, ElementType}, + Binary, + Bson, + DateTime, + JavaScriptCodeWithScope, + Regex, + Timestamp, +}; +use chrono::{TimeZone, Utc}; fn to_bytes(doc: &crate::Document) -> Vec { let mut docbytes = Vec::new(); @@ -76,9 +86,9 @@ fn rawdoc_to_doc() { "document": {}, "array": ["binary", "serialized", "object", "notation"], "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1, 2, 3] }, - "object_id": ObjectId::with_bytes([1, 2, 3, 4, 5,6,7,8,9,10, 11,12]), + "object_id": ObjectId::from_bytes([1, 2, 3, 4, 5,6,7,8,9,10, 11,12]), "boolean": true, - "datetime": Utc::now(), + "datetime": DateTime::now(), "null": Bson::Null, "regex": Bson::RegularExpression(Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}), "javascript": Bson::JavaScriptCode(String::from("console.log(console);")), @@ -124,6 +134,7 @@ fn string() { "hello", ); } + #[test] fn document() { let rawdoc = RawDocument::from_document(&doc! {"document": {}}); @@ -134,7 +145,7 @@ fn document() { .expect("no key document") .as_document() .expect("result was not a document"); - assert_eq!(&doc.data, [5, 0, 0, 0, 0].as_ref()); // Empty document + assert_eq!(doc.as_bytes(), [5u8, 0, 0, 0, 0].as_ref()); // Empty document } #[test] @@ -172,7 +183,7 @@ fn binary() { #[test] fn object_id() { let rawdoc = RawDocument::from_document(&doc! { - "object_id": ObjectId::with_bytes([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), + "object_id": ObjectId::from_bytes([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), }); let oid = rawdoc .get("object_id") @@ -203,7 +214,7 @@ fn boolean() { fn datetime() { let rawdoc = RawDocument::from_document(&doc! { "boolean": true, - "datetime": Utc.ymd(2000,10,31).and_hms(12, 30, 45), + "datetime": DateTime::from_chrono(Utc.ymd(2000,10,31).and_hms(12, 30, 45)), }); let datetime = rawdoc .get("datetime") @@ -211,7 +222,7 @@ fn datetime() { .expect("no key datetime") .as_datetime() .expect("result was not datetime"); - assert_eq!(datetime.to_rfc3339(), "2000-10-31T12:30:45+00:00"); + assert_eq!(datetime.to_rfc3339(), "2000-10-31T12:30:45Z"); } #[test] @@ -275,14 +286,15 @@ fn javascript_with_scope() { let rawdoc = RawDocument::from_document(&doc! { "javascript_with_scope": Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope{ code: String::from("console.log(msg);"), scope: doc!{"ok": true}}), }); - let (js, scopedoc) = rawdoc + let js_with_scope = rawdoc .get("javascript_with_scope") .expect("error finding key javascript_with_scope") .expect("no key javascript_with_scope") .as_javascript_with_scope() .expect("was not javascript with scope"); - assert_eq!(js, "console.log(msg);"); - let (scope_key, scope_value_bson) = scopedoc + assert_eq!(js_with_scope.code(), "console.log(msg);"); + let (scope_key, scope_value_bson) = js_with_scope + .scope() .into_iter() .next() .expect("no next value in scope") @@ -337,15 +349,15 @@ fn int64() { } #[test] fn document_iteration() { - let docbytes = to_bytes(&doc! { + let doc = doc! { "f64": 2.5, "string": "hello", "document": {}, "array": ["binary", "serialized", "object", "notation"], "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] }, - "object_id": ObjectId::with_bytes([1, 2, 3, 4, 5,6,7,8,9,10, 11,12]), + "object_id": ObjectId::from_bytes([1, 2, 3, 4, 5,6,7,8,9,10, 11,12]), "boolean": true, - "datetime": Utc::now(), + "datetime": DateTime::now(), "null": Bson::Null, "regex": Bson::RegularExpression(Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}), "javascript": Bson::JavaScriptCode(String::from("console.log(console);")), @@ -355,13 +367,14 @@ fn document_iteration() { "timestamp": Bson::Timestamp(Timestamp { time: 3542578, increment: 0 }), "int64": 46i64, "end": "END", - }); - let rawdoc = unsafe { RawDocumentRef::new_unchecked(&docbytes) }; + }; + let rawdoc = RawDocument::from_document(&doc); + let rawdocref = rawdoc.as_ref(); assert_eq!( - rawdoc + rawdocref .into_iter() - .collect::, Error>>() + .collect::>>() .expect("collecting iterated doc") .len(), 17 @@ -382,7 +395,7 @@ fn into_bson_conversion() { "string": "hello", "document": {}, "array": ["binary", "serialized", "object", "notation"], - "object_id": ObjectId::with_bytes([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), + "object_id": ObjectId::from_bytes([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] }, "boolean": false, }); @@ -409,7 +422,7 @@ fn into_bson_conversion() { ); assert_eq!( *doc.get("object_id").expect("object_id not found"), - Bson::ObjectId(ObjectId::with_bytes([ + Bson::ObjectId(ObjectId::from_bytes([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ])) ); @@ -426,18 +439,10 @@ fn into_bson_conversion() { ); } +use super::props::arbitrary_bson; use proptest::prelude::*; use std::convert::TryInto; -use super::{props::arbitrary_bson, RawDocument}; -use crate::doc; - -fn to_bytes(doc: &crate::Document) -> Vec { - let mut docbytes = Vec::new(); - doc.to_writer(&mut docbytes).unwrap(); - docbytes -} - proptest! { #[test] fn no_crashes(s: Vec) { @@ -452,7 +457,7 @@ proptest! { let raw = RawDocument::new(raw); prop_assert!(raw.is_ok()); let raw = raw.unwrap(); - let roundtrip: Result = raw.try_into(); + let roundtrip: Result = raw.try_into(); prop_assert!(roundtrip.is_ok()); let roundtrip = roundtrip.unwrap(); prop_assert_eq!(doc, roundtrip); From e02b45b5138e80eaf693b7d5d227788a205a072e Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Mon, 4 Oct 2021 19:49:10 -0400 Subject: [PATCH 12/48] fix clippy --- src/raw/test.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/raw/test.rs b/src/raw/test.rs index 669ca4a1..e1a26fbf 100644 --- a/src/raw/test.rs +++ b/src/raw/test.rs @@ -207,7 +207,7 @@ fn boolean() { .as_bool() .expect("result was not boolean"); - assert_eq!(boolean, true); + assert!(boolean); } #[test] @@ -301,7 +301,7 @@ fn javascript_with_scope() { .expect("invalid element"); assert_eq!(scope_key, "ok"); let scope_value = scope_value_bson.as_bool().expect("not a boolean"); - assert_eq!(scope_value, true); + assert!(scope_value); } #[test] From 23e8d6dcf5dcd5ef249b4f1c85a3179f9064faa1 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 6 Oct 2021 15:16:12 -0400 Subject: [PATCH 13/48] include expected and unexpected types in Error::UnexpectedType --- src/raw/doc.rs | 22 ++--- src/raw/elem.rs | 239 +++++++++++++++++++---------------------------- src/raw/error.rs | 16 +++- 3 files changed, 123 insertions(+), 154 deletions(-) diff --git a/src/raw/doc.rs b/src/raw/doc.rs index f7badee9..f690a34a 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -370,7 +370,7 @@ impl RawDocumentRef { /// }); /// /// assert_eq!(doc.get_f64("f64"), Ok(Some(2.5))); - /// assert_eq!(doc.get_f64("bool"), Err(Error::UnexpectedType)); + /// assert!(matches!(doc.get_f64("bool"), Err(Error::UnexpectedType { .. }))); /// assert_eq!(doc.get_f64("unknown"), Ok(None)); /// # Ok::<(), Error>(()) /// ``` @@ -390,7 +390,7 @@ impl RawDocumentRef { /// }); /// /// assert_eq!(doc.get_str("string"), Ok(Some("hello"))); - /// assert_eq!(doc.get_str("bool"), Err(Error::UnexpectedType)); + /// assert!(matches!(doc.get_str("bool"), Err(Error::UnexpectedType { .. }))); /// assert_eq!(doc.get_str("unknown"), Ok(None)); /// # Ok::<(), Error>(()) /// ``` @@ -411,7 +411,7 @@ impl RawDocumentRef { /// }); /// /// assert_eq!(doc.get_document("doc")?.expect("finding key doc").get_str("key"), Ok(Some("value"))); - /// assert_eq!(doc.get_document("bool").unwrap_err(), Error::UnexpectedType); + /// assert!(matches!(doc.get_document("bool").unwrap_err(), Error::UnexpectedType { .. })); /// assert!(doc.get_document("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` @@ -462,7 +462,7 @@ impl RawDocumentRef { /// }); /// /// assert_eq!(doc.get_binary("binary")?.map(RawBinary::as_bytes), Some(&[1, 2, 3][..])); - /// assert_eq!(doc.get_binary("bool").unwrap_err(), Error::UnexpectedType); + /// assert!(matches!(doc.get_binary("bool").unwrap_err(), Error::UnexpectedType { .. })); /// assert!(doc.get_binary("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` @@ -483,7 +483,7 @@ impl RawDocumentRef { /// }); /// /// let oid = doc.get_object_id("_id")?.unwrap(); - /// assert_eq!(doc.get_object_id("bool").unwrap_err(), Error::UnexpectedType); + /// assert!(matches!(doc.get_object_id("bool").unwrap_err(), Error::UnexpectedType { .. })); /// assert!(doc.get_object_id("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` @@ -504,7 +504,7 @@ impl RawDocumentRef { /// }); /// /// assert!(doc.get_bool("bool")?.unwrap()); - /// assert_eq!(doc.get_bool("_id").unwrap_err(), Error::UnexpectedType); + /// assert!(matches!(doc.get_bool("_id").unwrap_err(), Error::UnexpectedType { .. })); /// assert!(doc.get_object_id("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` @@ -525,7 +525,7 @@ impl RawDocumentRef { /// "bool": true, /// }); /// assert_eq!(doc.get_datetime("created_at")?, Some(dt)); - /// assert_eq!(doc.get_datetime("bool").unwrap_err(), Error::UnexpectedType); + /// assert!(matches!(doc.get_datetime("bool").unwrap_err(), Error::UnexpectedType { .. })); /// assert!(doc.get_datetime("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` @@ -549,7 +549,7 @@ impl RawDocumentRef { /// /// assert_eq!(doc.get_regex("regex")?.unwrap().pattern(), r"end\s*$"); /// assert_eq!(doc.get_regex("regex")?.unwrap().options(), "i"); - /// assert_eq!(doc.get_regex("bool").unwrap_err(), Error::UnexpectedType); + /// assert!(matches!(doc.get_regex("bool").unwrap_err(), Error::UnexpectedType { .. })); /// assert!(doc.get_regex("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` @@ -572,7 +572,7 @@ impl RawDocumentRef { /// /// assert_eq!(timestamp.time(), 649876543); /// assert_eq!(timestamp.increment(), 9); - /// assert_eq!(doc.get_timestamp("bool"), Err(Error::UnexpectedType)); + /// assert!(matches!(doc.get_timestamp("bool"), Err(Error::UnexpectedType { .. }))); /// assert_eq!(doc.get_timestamp("unknown"), Ok(None)); /// # Ok::<(), Error>(()) /// ``` @@ -593,7 +593,7 @@ impl RawDocumentRef { /// }); /// /// assert_eq!(doc.get_i32("i32"), Ok(Some(1_000_000))); - /// assert_eq!(doc.get_i32("bool"), Err(Error::UnexpectedType)); + /// assert!(matches!(doc.get_i32("bool"), Err(Error::UnexpectedType { .. }))); /// assert_eq!(doc.get_i32("unknown"), Ok(None)); /// # Ok::<(), Error>(()) /// ``` @@ -614,7 +614,7 @@ impl RawDocumentRef { /// }); /// /// assert_eq!(doc.get_i64("i64"), Ok(Some(9223372036854775807))); - /// assert_eq!(doc.get_i64("bool"), Err(Error::UnexpectedType)); + /// assert!(matches!(doc.get_i64("bool"), Err(Error::UnexpectedType { .. }))); /// assert_eq!(doc.get_i64("unknown"), Ok(None)); /// # Ok::<(), Error>(()) /// ``` diff --git a/src/raw/elem.rs b/src/raw/elem.rs index 8b4b5e6d..80e17091 100644 --- a/src/raw/elem.rs +++ b/src/raw/elem.rs @@ -1,7 +1,7 @@ use std::convert::{TryFrom, TryInto}; // use chrono::{DateTime, TimeZone, Utc}; -use crate::DateTime; +use crate::{DateTime, Decimal128}; #[cfg(feature = "decimal128")] use super::d128_from_slice; @@ -44,218 +44,179 @@ impl<'a> RawBson<'a> { self.data } + fn validate_type(self, expected: ElementType) -> Result<()> { + if self.element_type != expected { + return Err(Error::UnexpectedType { + actual: self.element_type, + expected, + }); + } + Ok(()) + } + /// Gets the f64 that's referenced or returns an error if the value isn't a BSON double. pub fn as_f64(self) -> Result { - if let ElementType::Double = self.element_type { - Ok(f64::from_bits(u64::from_le_bytes( - self.data.try_into().map_err(|_| Error::MalformedValue { - message: "f64 should be 8 bytes long".into(), - })?, - ))) - } else { - Err(Error::UnexpectedType) - } + self.validate_type(ElementType::Double)?; + Ok(f64::from_bits(u64::from_le_bytes( + self.data.try_into().map_err(|_| Error::MalformedValue { + message: "f64 should be 8 bytes long".into(), + })?, + ))) } /// Gets the string that's referenced or returns an error if the value isn't a BSON string. pub fn as_str(self) -> Result<&'a str> { - if let ElementType::String = self.element_type { - read_lenencoded(self.data) - } else { - Err(Error::UnexpectedType) - } + self.validate_type(ElementType::String)?; + read_lenencoded(self.data) } /// Gets the document that's referenced or returns an error if the value isn't a BSON document. pub fn as_document(self) -> Result<&'a RawDocumentRef> { - if let ElementType::EmbeddedDocument = self.element_type { - RawDocumentRef::new(self.data) - } else { - Err(Error::UnexpectedType) - } + self.validate_type(ElementType::EmbeddedDocument)?; + RawDocumentRef::new(self.data) } /// Gets the array that's referenced or returns an error if the value isn't a BSON array. pub fn as_array(self) -> Result<&'a RawArray> { - if let ElementType::Array = self.element_type { - RawArray::new(self.data) - } else { - Err(Error::UnexpectedType) - } + self.validate_type(ElementType::Array)?; + RawArray::new(self.data) } /// Gets the BSON binary value that's referenced or returns an error if the value a BSON binary. pub fn as_binary(self) -> Result> { - if let ElementType::Binary = self.element_type { - let length = i32_from_slice(&self.data[0..4]); - let subtype = BinarySubtype::from(self.data[4]); // TODO: This mishandles reserved values - if self.data.len() as i32 != length + 5 { - return Err(Error::MalformedValue { - message: "binary bson has wrong declared length".into(), - }); - } - let data = match subtype { - BinarySubtype::BinaryOld => { - if length < 4 { - return Err(Error::MalformedValue { - message: "old binary subtype has no inner declared length".into(), - }); - } - let oldlength = i32_from_slice(&self.data[5..9]); - if oldlength + 4 != length { - return Err(Error::MalformedValue { - message: "old binary subtype has wrong inner declared length".into(), - }); - } - &self.data[9..] - } - _ => &self.data[5..], - }; - Ok(RawBinary::new(subtype, data)) - } else { - Err(Error::UnexpectedType) + self.validate_type(ElementType::Binary)?; + + let length = i32_from_slice(&self.data[0..4]); + let subtype = BinarySubtype::from(self.data[4]); // TODO: This mishandles reserved values + if self.data.len() as i32 != length + 5 { + return Err(Error::MalformedValue { + message: "binary bson has wrong declared length".into(), + }); } + let data = match subtype { + BinarySubtype::BinaryOld => { + if length < 4 { + return Err(Error::MalformedValue { + message: "old binary subtype has no inner declared length".into(), + }); + } + let oldlength = i32_from_slice(&self.data[5..9]); + if oldlength + 4 != length { + return Err(Error::MalformedValue { + message: "old binary subtype has wrong inner declared length".into(), + }); + } + &self.data[9..] + } + _ => &self.data[5..], + }; + Ok(RawBinary::new(subtype, data)) } /// Gets the ObjectId that's referenced or returns an error if the value isn't a BSON ObjectId. pub fn as_object_id(self) -> Result { - if let ElementType::ObjectId = self.element_type { - Ok(ObjectId::from_bytes(self.data.try_into().map_err( - |_| Error::MalformedValue { - message: "object id should be 12 bytes long".into(), - }, - )?)) - } else { - Err(Error::UnexpectedType) - } + self.validate_type(ElementType::ObjectId)?; + Ok(ObjectId::from_bytes(self.data.try_into().map_err( + |_| Error::MalformedValue { + message: "object id should be 12 bytes long".into(), + }, + )?)) } /// Gets the boolean that's referenced or returns an error if the value isn't a BSON boolean. pub fn as_bool(self) -> Result { - if let ElementType::Boolean = self.element_type { - if self.data.len() != 1 { - Err(Error::MalformedValue { - message: "boolean has length != 1".into(), - }) - } else { - match self.data[0] { - 0 => Ok(false), - 1 => Ok(true), - _ => Err(Error::MalformedValue { - message: "boolean value was not 0 or 1".into(), - }), - } - } + self.validate_type(ElementType::Boolean)?; + if self.data.len() != 1 { + Err(Error::MalformedValue { + message: "boolean has length != 1".into(), + }) } else { - Err(Error::UnexpectedType) + match self.data[0] { + 0 => Ok(false), + 1 => Ok(true), + _ => Err(Error::MalformedValue { + message: "boolean value was not 0 or 1".into(), + }), + } } } /// Gets the DateTime that's referenced or returns an error if the value isn't a BSON DateTime. pub fn as_datetime(self) -> Result { - if let ElementType::DateTime = self.element_type { - let millis = i64_from_slice(self.data); - Ok(DateTime::from_millis(millis)) - } else { - Err(Error::UnexpectedType) - } + self.validate_type(ElementType::DateTime)?; + let millis = i64_from_slice(self.data); + Ok(DateTime::from_millis(millis)) } /// Gets the regex that's referenced or returns an error if the value isn't a BSON regex. pub fn as_regex(self) -> Result> { - if let ElementType::RegularExpression = self.element_type { - RawRegex::new(self.data) - } else { - Err(Error::UnexpectedType) - } + self.validate_type(ElementType::RegularExpression)?; + RawRegex::new(self.data) } /// Gets the BSON JavaScript code that's referenced or returns an error if the value isn't BSON /// JavaScript code. pub fn as_javascript(self) -> Result<&'a str> { - if let ElementType::JavaScriptCode = self.element_type { - read_lenencoded(self.data) - } else { - Err(Error::UnexpectedType) - } + self.validate_type(ElementType::JavaScriptCode)?; + read_lenencoded(self.data) } /// Gets the symbol that's referenced or returns an error if the value isn't a BSON symbol. pub fn as_symbol(self) -> Result<&'a str> { - if let ElementType::Symbol = self.element_type { - read_lenencoded(self.data) - } else { - Err(Error::UnexpectedType) - } + self.validate_type(ElementType::Symbol)?; + read_lenencoded(self.data) } /// Gets the BSON JavaScript code with scope that's referenced or returns an error if the value /// isn't BSON JavaScript code with scope. pub fn as_javascript_with_scope(self) -> Result> { - if let ElementType::JavaScriptCodeWithScope = self.element_type { - let length = i32_from_slice(&self.data[..4]); - assert_eq!(self.data.len() as i32, length); + self.validate_type(ElementType::JavaScriptCodeWithScope)?; + let length = i32_from_slice(&self.data[..4]); + assert_eq!(self.data.len() as i32, length); - let code = read_lenencoded(&self.data[4..])?; - let scope = RawDocumentRef::new(&self.data[9 + code.len()..])?; + let code = read_lenencoded(&self.data[4..])?; + let scope = RawDocumentRef::new(&self.data[9 + code.len()..])?; - Ok(RawJavaScriptCodeWithScope { code, scope }) - } else { - Err(Error::UnexpectedType) - } + Ok(RawJavaScriptCodeWithScope { code, scope }) } /// Gets the timestamp that's referenced or returns an error if the value isn't a BSON /// timestamp. pub fn as_timestamp(self) -> Result> { - if let ElementType::Timestamp = self.element_type { - assert_eq!(self.data.len(), 8); - Ok(RawTimestamp { data: self.data }) - } else { - Err(Error::UnexpectedType) - } + self.validate_type(ElementType::Timestamp)?; + assert_eq!(self.data.len(), 8); + Ok(RawTimestamp { data: self.data }) } /// Gets the i32 that's referenced or returns an error if the value isn't a BSON int32. pub fn as_i32(self) -> Result { - if let ElementType::Int32 = self.element_type { - assert_eq!(self.data.len(), 4); - Ok(i32_from_slice(self.data)) - } else { - Err(Error::UnexpectedType) - } + self.validate_type(ElementType::Int32)?; + assert_eq!(self.data.len(), 4); + Ok(i32_from_slice(self.data)) } /// Gets the i64 that's referenced or returns an error if the value isn't a BSON int64. pub fn as_i64(self) -> Result { - if let ElementType::Int64 = self.element_type { - assert_eq!(self.data.len(), 8); - Ok(i64_from_slice(self.data)) - } else { - Err(Error::UnexpectedType) - } + self.validate_type(ElementType::Int64)?; + assert_eq!(self.data.len(), 8); + Ok(i64_from_slice(self.data)) } /// Gets the decimal that's referenced or returns an error if the value isn't a BSON Decimal128. - #[cfg(feature = "decimal128")] pub fn as_decimal128(self) -> Result { - if let ElementType::Decimal128 = self.element_type { - assert_eq!(self.data.len(), 16); - Ok(d128_from_slice(self.data)) - } else { - Err(Error::UnexpectedType) - } + self.validate_type(ElementType::Decimal128)?; + let bytes: [u8; 128 / 8] = self.data.try_into().map_err(|_| Error::MalformedValue { + message: format!("decimal128 value has invalid length: {}", self.data.len()), + })?; + Ok(Decimal128::from_bytes(bytes)) } pub fn as_null(self) -> Result<()> { - if let ElementType::Null = self.element_type { - Ok(()) - } else { - Err(Error::UnexpectedType) - } + self.validate_type(ElementType::Null) } } +// TODO: finish implementation impl<'a> TryFrom> for Bson { type Error = Error; @@ -315,11 +276,7 @@ impl<'a> TryFrom> for Bson { scope: scope.try_into()?, }) } - #[cfg(feature = "decimal128")] ElementType::Decimal128 => Bson::Decimal128(rawbson.as_decimal128()?), - - #[cfg(not(feature = "decimal128"))] - ElementType::Decimal128 => return Err(Error::UnexpectedType), ElementType::MaxKey => unimplemented!(), ElementType::MinKey => unimplemented!(), }) diff --git a/src/raw/error.rs b/src/raw/error.rs index 269b3d29..8b62fe00 100644 --- a/src/raw/error.rs +++ b/src/raw/error.rs @@ -1,10 +1,18 @@ +use crate::spec::ElementType; + /// An error that occurs when attempting to parse raw BSON bytes. #[derive(Debug, PartialEq)] +#[non_exhaustive] pub enum Error { /// A BSON value did not fit the expected type. - UnexpectedType, + #[non_exhaustive] + UnexpectedType { + actual: ElementType, + expected: ElementType, + }, /// A BSON value did not fit the proper format. + #[non_exhaustive] MalformedValue { message: String }, /// Improper UTF-8 bytes were found when proper UTF-8 was expected. The error value contains @@ -15,7 +23,11 @@ pub enum Error { impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { - Self::UnexpectedType => write!(f, "unexpected type"), + Self::UnexpectedType { actual, expected } => write!( + f, + "unexpected element type: {:?}, expected: {:?}", + actual, expected + ), Self::MalformedValue { message } => write!(f, "malformed value: {:?}", message), Self::Utf8EncodingError(_) => write!(f, "utf-8 encoding error"), } From 1769a145120ab877995de0579ada23b71c1b360b Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 6 Oct 2021 15:23:22 -0400 Subject: [PATCH 14/48] fix typo --- src/raw/array.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/raw/array.rs b/src/raw/array.rs index b58db623..06b0745e 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -150,7 +150,7 @@ impl<'a> IntoIterator for &'a RawArray { } } -/// An iterator over borrwed raw BSON array values. +/// An iterator over borrowed raw BSON array values. pub struct RawArrayIter<'a> { inner: RawDocumentIter<'a>, } From 25a68c12af961703734efa744ea24c667195b202 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 6 Oct 2021 15:25:53 -0400 Subject: [PATCH 15/48] move props definitions to test submodule --- src/raw/mod.rs | 2 -- src/raw/{test.rs => test/mod.rs} | 4 +++- src/raw/{ => test}/props.rs | 0 3 files changed, 3 insertions(+), 3 deletions(-) rename src/raw/{test.rs => test/mod.rs} (99%) rename src/raw/{ => test}/props.rs (100%) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 2aafee68..3fc231b6 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -105,8 +105,6 @@ mod doc; mod elem; mod error; #[cfg(test)] -mod props; -#[cfg(test)] mod test; use std::convert::TryInto; diff --git a/src/raw/test.rs b/src/raw/test/mod.rs similarity index 99% rename from src/raw/test.rs rename to src/raw/test/mod.rs index e1a26fbf..8d928d20 100644 --- a/src/raw/test.rs +++ b/src/raw/test/mod.rs @@ -1,3 +1,5 @@ +mod props; + use super::*; use crate::{ doc, @@ -439,7 +441,7 @@ fn into_bson_conversion() { ); } -use super::props::arbitrary_bson; +use props::arbitrary_bson; use proptest::prelude::*; use std::convert::TryInto; diff --git a/src/raw/props.rs b/src/raw/test/props.rs similarity index 100% rename from src/raw/props.rs rename to src/raw/test/props.rs From 8578553b5c4b0864fa75fbb79c1b91d0ffd22369 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 6 Oct 2021 15:59:50 -0400 Subject: [PATCH 16/48] return errors instead of panicking in certain places --- src/raw/doc.rs | 224 +++++++++++++++++++++++++----------------------- src/raw/elem.rs | 25 +++--- src/raw/mod.rs | 45 ++++++---- 3 files changed, 158 insertions(+), 136 deletions(-) diff --git a/src/raw/doc.rs b/src/raw/doc.rs index f690a34a..8f5917f1 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -84,7 +84,7 @@ impl RawDocument { }); } - let length = i32_from_slice(&data[..4]); + let length = i32_from_slice(&data[..4])?; if data.len() as i32 != length { return Err(Error::MalformedValue { @@ -275,7 +275,7 @@ impl RawDocumentRef { }); } - let length = i32_from_slice(&data[..4]); + let length = i32_from_slice(&data[..4])?; if data.len() as i32 != length { return Err(Error::MalformedValue { @@ -692,134 +692,140 @@ impl<'a> Iterator for RawDocumentIter<'a> { } } - let key = match read_nullterminated(&self.doc.data[self.offset + 1..]) { - Ok(key) => key, - Err(err) => return Some(Err(err)), - }; + // helper function to ease the use of the `?` operator + fn read_next<'a>(iter: &mut RawDocumentIter<'a>) -> Result<(&'a str, RawBson<'a>)> { + let key = read_nullterminated(&iter.doc.data[iter.offset + 1..])?; - let valueoffset = self.offset + 1 + key.len() + 1; // type specifier + key + \0 + let valueoffset = iter.offset + 1 + key.len() + 1; // type specifier + key + \0 - let element_type = match ElementType::from(self.doc.data[self.offset]) { - Some(et) => et, - None => { - return Some(Err(Error::MalformedValue { - message: format!("invalid tag: {}", self.doc.data[self.offset]), - })) - } - }; - - let element_size = match element_type { - ElementType::Double => 8, - ElementType::String => { - let size = - 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; - - if self.doc.data[valueoffset + size - 1] != 0 { - return Some(Err(Error::MalformedValue { - message: "string not null terminated".into(), - })); + let element_type = match ElementType::from(iter.doc.data[iter.offset]) { + Some(et) => et, + None => { + return Err(Error::MalformedValue { + message: format!("invalid tag: {}", iter.doc.data[iter.offset]), + }) } + }; - size - } - ElementType::EmbeddedDocument => { - let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + let element_size = match element_type { + ElementType::Double => 8, + ElementType::String => { + let size = + 4 + i32_from_slice(&iter.doc.data[valueoffset..valueoffset + 4])? as usize; - if self.doc.data[valueoffset + size - 1] != 0 { - return Some(Err(Error::MalformedValue { - message: "document not null terminated".into(), - })); + if iter.doc.data[valueoffset + size - 1] != 0 { + return Err(Error::MalformedValue { + message: "string not null terminated".into(), + }); + } + + size } + ElementType::EmbeddedDocument => { + let size = + i32_from_slice(&iter.doc.data[valueoffset..valueoffset + 4])? as usize; - size - } - ElementType::Array => { - let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + if iter.doc.data[valueoffset + size - 1] != 0 { + return Err(Error::MalformedValue { + message: "document not null terminated".into(), + }); + } - if self.doc.data[valueoffset + size - 1] != 0 { - return Some(Err(Error::MalformedValue { - message: "array not null terminated".into(), - })); + size } + ElementType::Array => { + let size = + i32_from_slice(&iter.doc.data[valueoffset..valueoffset + 4])? as usize; - size - } - ElementType::Binary => { - 5 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize - } - ElementType::Undefined => 0, - ElementType::ObjectId => 12, - ElementType::Boolean => 1, - ElementType::DateTime => 8, - ElementType::Null => 0, - ElementType::RegularExpression => { - let regex = match read_nullterminated(&self.doc.data[valueoffset..]) { - Ok(regex) => regex, - Err(err) => return Some(Err(err)), - }; - - let options = - match read_nullterminated(&self.doc.data[valueoffset + regex.len() + 1..]) { + if iter.doc.data[valueoffset + size - 1] != 0 { + return Err(Error::MalformedValue { + message: "array not null terminated".into(), + }); + } + + size + } + ElementType::Binary => { + 5 + i32_from_slice(&iter.doc.data[valueoffset..valueoffset + 4])? as usize + } + ElementType::Undefined => 0, + ElementType::ObjectId => 12, + ElementType::Boolean => 1, + ElementType::DateTime => 8, + ElementType::Null => 0, + ElementType::RegularExpression => { + let regex = match read_nullterminated(&iter.doc.data[valueoffset..]) { + Ok(regex) => regex, + Err(err) => return Err(err), + }; + + let options = match read_nullterminated( + &iter.doc.data[valueoffset + regex.len() + 1..], + ) { Ok(options) => options, - Err(err) => return Some(Err(err)), + Err(err) => return Err(err), }; - regex.len() + options.len() + 2 - } - ElementType::DbPointer => { - let string_size = - 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + regex.len() + options.len() + 2 + } + ElementType::DbPointer => { + let string_size = + 4 + i32_from_slice(&iter.doc.data[valueoffset..valueoffset + 4])? as usize; - let id_size = 12; + let id_size = 12; - if self.doc.data[valueoffset + string_size - 1] != 0 { - return Some(Err(Error::MalformedValue { - message: "DBPointer string not null-terminated".into(), - })); + if iter.doc.data[valueoffset + string_size - 1] != 0 { + return Err(Error::MalformedValue { + message: "DBPointer string not null-terminated".into(), + }); + } + + string_size + id_size } + ElementType::JavaScriptCode => { + let size = + 4 + i32_from_slice(&iter.doc.data[valueoffset..valueoffset + 4])? as usize; - string_size + id_size - } - ElementType::JavaScriptCode => { - let size = - 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; - - if self.doc.data[valueoffset + size - 1] != 0 { - return Some(Err(Error::MalformedValue { - message: "javascript code not null-terminated".into(), - })); + if iter.doc.data[valueoffset + size - 1] != 0 { + return Err(Error::MalformedValue { + message: "javascript code not null-terminated".into(), + }); + } + + size } + ElementType::Symbol => { + 4 + i32_from_slice(&iter.doc.data[valueoffset..valueoffset + 4])? as usize + } + ElementType::JavaScriptCodeWithScope => { + let size = + i32_from_slice(&iter.doc.data[valueoffset..valueoffset + 4])? as usize; - size - } - ElementType::Symbol => { - 4 + i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize - } - ElementType::JavaScriptCodeWithScope => { - let size = i32_from_slice(&self.doc.data[valueoffset..valueoffset + 4]) as usize; + if iter.doc.data[valueoffset + size - 1] != 0 { + return Err(Error::MalformedValue { + message: "javascript with scope not null-terminated".into(), + }); + } - if self.doc.data[valueoffset + size - 1] != 0 { - return Some(Err(Error::MalformedValue { - message: "javascript with scope not null-terminated".into(), - })); + size } + ElementType::Int32 => 4, + ElementType::Timestamp => 8, + ElementType::Int64 => 8, + ElementType::Decimal128 => 16, + ElementType::MaxKey => 0, + ElementType::MinKey => 0, + }; + + let nextoffset = valueoffset + element_size; + iter.offset = nextoffset; + + Ok(( + key, + RawBson::new(element_type, &iter.doc.data[valueoffset..nextoffset]), + )) + } - size - } - ElementType::Int32 => 4, - ElementType::Timestamp => 8, - ElementType::Int64 => 8, - ElementType::Decimal128 => 16, - ElementType::MaxKey => 0, - ElementType::MinKey => 0, - }; - - let nextoffset = valueoffset + element_size; - self.offset = nextoffset; - - Some(Ok(( - key, - RawBson::new(element_type, &self.doc.data[valueoffset..nextoffset]), - ))) + Some(read_next(self)) } } diff --git a/src/raw/elem.rs b/src/raw/elem.rs index 80e17091..e90a1526 100644 --- a/src/raw/elem.rs +++ b/src/raw/elem.rs @@ -86,7 +86,7 @@ impl<'a> RawBson<'a> { pub fn as_binary(self) -> Result> { self.validate_type(ElementType::Binary)?; - let length = i32_from_slice(&self.data[0..4]); + let length = i32_from_slice(&self.data[0..4])?; let subtype = BinarySubtype::from(self.data[4]); // TODO: This mishandles reserved values if self.data.len() as i32 != length + 5 { return Err(Error::MalformedValue { @@ -100,7 +100,7 @@ impl<'a> RawBson<'a> { message: "old binary subtype has no inner declared length".into(), }); } - let oldlength = i32_from_slice(&self.data[5..9]); + let oldlength = i32_from_slice(&self.data[5..9])?; if oldlength + 4 != length { return Err(Error::MalformedValue { message: "old binary subtype has wrong inner declared length".into(), @@ -144,7 +144,7 @@ impl<'a> RawBson<'a> { /// Gets the DateTime that's referenced or returns an error if the value isn't a BSON DateTime. pub fn as_datetime(self) -> Result { self.validate_type(ElementType::DateTime)?; - let millis = i64_from_slice(self.data); + let millis = i64_from_slice(self.data)?; Ok(DateTime::from_millis(millis)) } @@ -171,8 +171,13 @@ impl<'a> RawBson<'a> { /// isn't BSON JavaScript code with scope. pub fn as_javascript_with_scope(self) -> Result> { self.validate_type(ElementType::JavaScriptCodeWithScope)?; - let length = i32_from_slice(&self.data[..4]); - assert_eq!(self.data.len() as i32, length); + let length = i32_from_slice(&self.data[..4])?; + + if (self.data.len() as i32) != length { + return Err(Error::MalformedValue { + message: "".to_string(), + }); + } let code = read_lenencoded(&self.data[4..])?; let scope = RawDocumentRef::new(&self.data[9 + code.len()..])?; @@ -191,15 +196,13 @@ impl<'a> RawBson<'a> { /// Gets the i32 that's referenced or returns an error if the value isn't a BSON int32. pub fn as_i32(self) -> Result { self.validate_type(ElementType::Int32)?; - assert_eq!(self.data.len(), 4); - Ok(i32_from_slice(self.data)) + i32_from_slice(self.data) } /// Gets the i64 that's referenced or returns an error if the value isn't a BSON int64. pub fn as_i64(self) -> Result { self.validate_type(ElementType::Int64)?; - assert_eq!(self.data.len(), 8); - Ok(i64_from_slice(self.data)) + i64_from_slice(self.data) } /// Gets the decimal that's referenced or returns an error if the value isn't a BSON Decimal128. @@ -351,14 +354,14 @@ impl<'a> RawTimestamp<'a> { pub fn time(&self) -> u32 { // RawBsonTimestamp can only be constructed with the correct data length, so this should // always succeed. - u32_from_slice(&self.data[4..8]) + u32_from_slice(&self.data[4..8]).unwrap() } /// Gets the increment portion of the timestamp. pub fn increment(&self) -> u32 { // RawBsonTimestamp can only be constructed with the correct data length, so this should // always succeed. - u32_from_slice(&self.data[0..4]) + u32_from_slice(&self.data[0..4]).unwrap() } } diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 3fc231b6..a0277b9e 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -115,34 +115,39 @@ pub use self::{ elem::{RawBinary, RawBson, RawJavaScriptCodeWithScope, RawRegex, RawTimestamp}, error::{Error, Result}, }; + /// Given a 4 byte u8 slice, return an i32 calculated from the bytes in /// little endian order /// /// # Panics /// /// This function panics if given a slice that is not four bytes long. -fn i32_from_slice(val: &[u8]) -> i32 { - i32::from_le_bytes(val.try_into().expect("i32 is four bytes")) +fn i32_from_slice(val: &[u8]) -> Result { + Ok(i32::from_le_bytes(val.try_into().map_err(|_| { + Error::MalformedValue { + message: format!("expected 4 bytes to read i32, instead got {}", val.len()), + } + })?)) } /// Given an 8 byte u8 slice, return an i64 calculated from the bytes in /// little endian order -/// -/// # Panics -/// -/// This function panics if given a slice that is not eight bytes long. -fn i64_from_slice(val: &[u8]) -> i64 { - i64::from_le_bytes(val.try_into().expect("i64 is eight bytes")) +fn i64_from_slice(val: &[u8]) -> Result { + Ok(i64::from_le_bytes(val.try_into().map_err(|_| { + Error::MalformedValue { + message: format!("expected 8 bytes to read i64, instead got {}", val.len()), + } + })?)) } /// Given a 4 byte u8 slice, return a u32 calculated from the bytes in /// little endian order -/// -/// # Panics -/// -/// This function panics if given a slice that is not four bytes long. -fn u32_from_slice(val: &[u8]) -> u32 { - u32::from_le_bytes(val.try_into().expect("u32 is four bytes")) +fn u32_from_slice(val: &[u8]) -> Result { + Ok(u32::from_le_bytes(val.try_into().map_err(|_| { + Error::MalformedValue { + message: format!("expected 4 bytes to read u32, instead got {}", val.len()), + } + })?)) } #[cfg(feature = "decimal128")] @@ -168,8 +173,16 @@ fn read_nullterminated(buf: &[u8]) -> Result<&str> { } fn read_lenencoded(buf: &[u8]) -> Result<&str> { - let length = i32_from_slice(&buf[..4]); - assert!(buf.len() as i32 >= length + 4); + let length = i32_from_slice(&buf[..4])?; + if (buf.len() as i32) < length + 4 { + return Err(Error::MalformedValue { + message: format!( + "expected buffer to contain at least {} bytes, but it only has {}", + length + 4, + buf.len() + ), + }); + } try_to_str(&buf[4..4 + length as usize - 1]) } From c9c14c482fb66bb8aa2b5b103f41882f30db1f61 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 6 Oct 2021 16:13:31 -0400 Subject: [PATCH 17/48] share read_bool code --- src/de/mod.rs | 2 +- src/raw/elem.rs | 15 ++++++--------- src/raw/mod.rs | 8 -------- 3 files changed, 7 insertions(+), 18 deletions(-) diff --git a/src/de/mod.rs b/src/de/mod.rs index 8bb977a2..8a20a342 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -108,7 +108,7 @@ pub(crate) fn read_string(reader: &mut R, utf8_lossy: bool) -> Ok(s) } -fn read_bool(mut reader: R) -> Result { +pub(crate) fn read_bool(mut reader: R) -> Result { let val = read_u8(&mut reader)?; if val > 1 { return Err(Error::invalid_value( diff --git a/src/raw/elem.rs b/src/raw/elem.rs index e90a1526..45c45f6e 100644 --- a/src/raw/elem.rs +++ b/src/raw/elem.rs @@ -1,7 +1,7 @@ use std::convert::{TryFrom, TryInto}; // use chrono::{DateTime, TimeZone, Utc}; -use crate::{DateTime, Decimal128}; +use crate::{de::read_bool, DateTime, Decimal128}; #[cfg(feature = "decimal128")] use super::d128_from_slice; @@ -87,7 +87,7 @@ impl<'a> RawBson<'a> { self.validate_type(ElementType::Binary)?; let length = i32_from_slice(&self.data[0..4])?; - let subtype = BinarySubtype::from(self.data[4]); // TODO: This mishandles reserved values + let subtype = BinarySubtype::from(self.data[4]); if self.data.len() as i32 != length + 5 { return Err(Error::MalformedValue { message: "binary bson has wrong declared length".into(), @@ -131,13 +131,9 @@ impl<'a> RawBson<'a> { message: "boolean has length != 1".into(), }) } else { - match self.data[0] { - 0 => Ok(false), - 1 => Ok(true), - _ => Err(Error::MalformedValue { - message: "boolean value was not 0 or 1".into(), - }), - } + read_bool(self.data).map_err(|e| Error::MalformedValue { + message: e.to_string(), + }) } } @@ -214,6 +210,7 @@ impl<'a> RawBson<'a> { Ok(Decimal128::from_bytes(bytes)) } + /// Gets the null value that's referenced or returns an error if the value isn't a BSON null. pub fn as_null(self) -> Result<()> { self.validate_type(ElementType::Null) } diff --git a/src/raw/mod.rs b/src/raw/mod.rs index a0277b9e..a4520af8 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -150,14 +150,6 @@ fn u32_from_slice(val: &[u8]) -> Result { })?)) } -#[cfg(feature = "decimal128")] -fn d128_from_slice(val: &[u8]) -> Decimal128 { - // TODO: Handle Big Endian platforms - let d = - unsafe { decimal::d128::from_raw_bytes(val.try_into().expect("d128 is sixteen bytes")) }; - Decimal128::from(d) -} - fn read_nullterminated(buf: &[u8]) -> Result<&str> { let mut splits = buf.splitn(2, |x| *x == 0); let value = splits.next().ok_or_else(|| Error::MalformedValue { From 4fc4b6550bed26e4ef5e20473b2edf800a509d99 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 6 Oct 2021 16:15:17 -0400 Subject: [PATCH 18/48] wrap Utf8EncodingError --- src/raw/error.rs | 6 ++++-- src/raw/mod.rs | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/raw/error.rs b/src/raw/error.rs index 8b62fe00..f2cf70ad 100644 --- a/src/raw/error.rs +++ b/src/raw/error.rs @@ -1,3 +1,5 @@ +use std::str::Utf8Error; + use crate::spec::ElementType; /// An error that occurs when attempting to parse raw BSON bytes. @@ -17,7 +19,7 @@ pub enum Error { /// Improper UTF-8 bytes were found when proper UTF-8 was expected. The error value contains /// the malformed data as bytes. - Utf8EncodingError(Vec), + Utf8EncodingError(Utf8Error), } impl std::fmt::Display for Error { @@ -29,7 +31,7 @@ impl std::fmt::Display for Error { actual, expected ), Self::MalformedValue { message } => write!(f, "malformed value: {:?}", message), - Self::Utf8EncodingError(_) => write!(f, "utf-8 encoding error"), + Self::Utf8EncodingError(e) => write!(f, "utf-8 encoding error: {}", e), } } } diff --git a/src/raw/mod.rs b/src/raw/mod.rs index a4520af8..117325b5 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -181,6 +181,6 @@ fn read_lenencoded(buf: &[u8]) -> Result<&str> { fn try_to_str(data: &[u8]) -> Result<&str> { match std::str::from_utf8(data) { Ok(s) => Ok(s), - Err(_) => Err(Error::Utf8EncodingError(data.into())), + Err(e) => Err(Error::Utf8EncodingError(e)), } } From 4eda6dede93e4c462143e2b90adac763143f1c56 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 6 Oct 2021 16:16:45 -0400 Subject: [PATCH 19/48] rename into_inner -> into_vec --- src/raw/doc.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/raw/doc.rs b/src/raw/doc.rs index 8f5917f1..ceebb7a6 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -159,9 +159,9 @@ impl RawDocument { /// use bson::doc; /// /// let doc = RawDocument::from_document(&doc!{}); - /// assert_eq!(doc.into_inner(), b"\x05\x00\x00\x00\x00".to_vec()); + /// assert_eq!(doc.into_vec(), b"\x05\x00\x00\x00\x00".to_vec()); /// ``` - pub fn into_inner(self) -> Vec { + pub fn into_vec(self) -> Vec { self.data.to_vec() } } From db3f8e7ea813e57e4eb0af03475a8065d7de0bde Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 6 Oct 2021 16:32:55 -0400 Subject: [PATCH 20/48] remove unwrap from `RawDocument::from_document` --- src/raw/doc.rs | 8 +++---- src/raw/test/mod.rs | 51 +++++++++++++++++++++++++++++---------------- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/src/raw/doc.rs b/src/raw/doc.rs index ceebb7a6..b20401cb 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -117,13 +117,13 @@ impl RawDocument { /// let doc = RawDocument::from_document(&document); /// # Ok::<(), Error>(()) /// ``` - pub fn from_document(doc: &Document) -> RawDocument { + pub fn from_document(doc: &Document) -> crate::ser::Result { let mut data = Vec::new(); - doc.to_writer(&mut data).unwrap(); + doc.to_writer(&mut data)?; - Self { + Ok(Self { data: data.into_boxed_slice(), - } + }) } /// Gets an iterator over the elements in the `RawDocument`, which yields `Result<&str, diff --git a/src/raw/test/mod.rs b/src/raw/test/mod.rs index 8d928d20..d1ae45dd 100644 --- a/src/raw/test/mod.rs +++ b/src/raw/test/mod.rs @@ -110,7 +110,7 @@ fn rawdoc_to_doc() { fn f64() { #![allow(clippy::float_cmp)] - let rawdoc = RawDocument::from_document(&doc! {"f64": 2.5}); + let rawdoc = RawDocument::from_document(&doc! { "f64": 2.5 }).unwrap(); assert_eq!( rawdoc .get("f64") @@ -124,7 +124,7 @@ fn f64() { #[test] fn string() { - let rawdoc = RawDocument::from_document(&doc! {"string": "hello"}); + let rawdoc = RawDocument::from_document(&doc! {"string": "hello"}).unwrap(); assert_eq!( rawdoc @@ -139,7 +139,7 @@ fn string() { #[test] fn document() { - let rawdoc = RawDocument::from_document(&doc! {"document": {}}); + let rawdoc = RawDocument::from_document(&doc! {"document": {}}).unwrap(); let doc = rawdoc .get("document") @@ -154,7 +154,8 @@ fn document() { fn array() { let rawdoc = RawDocument::from_document( &doc! { "array": ["binary", "serialized", "object", "notation"]}, - ); + ) + .unwrap(); let array = rawdoc .get("array") @@ -171,7 +172,8 @@ fn array() { fn binary() { let rawdoc = RawDocument::from_document(&doc! { "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] } - }); + }) + .unwrap(); let binary: elem::RawBinary<'_> = rawdoc .get("binary") .expect("error finding key binary") @@ -186,7 +188,8 @@ fn binary() { fn object_id() { let rawdoc = RawDocument::from_document(&doc! { "object_id": ObjectId::from_bytes([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), - }); + }) + .unwrap(); let oid = rawdoc .get("object_id") .expect("error finding key object_id") @@ -200,7 +203,8 @@ fn object_id() { fn boolean() { let rawdoc = RawDocument::from_document(&doc! { "boolean": true, - }); + }) + .unwrap(); let boolean = rawdoc .get("boolean") @@ -217,7 +221,8 @@ fn datetime() { let rawdoc = RawDocument::from_document(&doc! { "boolean": true, "datetime": DateTime::from_chrono(Utc.ymd(2000,10,31).and_hms(12, 30, 45)), - }); + }) + .unwrap(); let datetime = rawdoc .get("datetime") .expect("error finding key datetime") @@ -231,7 +236,8 @@ fn datetime() { fn null() { let rawdoc = RawDocument::from_document(&doc! { "null": null, - }); + }) + .unwrap(); let () = rawdoc .get("null") .expect("error finding key null") @@ -244,7 +250,7 @@ fn null() { fn regex() { let rawdoc = RawDocument::from_document(&doc! { "regex": Bson::RegularExpression(Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}), - }); + }).unwrap(); let regex = rawdoc .get("regex") .expect("error finding key regex") @@ -258,7 +264,8 @@ fn regex() { fn javascript() { let rawdoc = RawDocument::from_document(&doc! { "javascript": Bson::JavaScriptCode(String::from("console.log(console);")), - }); + }) + .unwrap(); let js = rawdoc .get("javascript") .expect("error finding key javascript") @@ -272,7 +279,8 @@ fn javascript() { fn symbol() { let rawdoc = RawDocument::from_document(&doc! { "symbol": Bson::Symbol(String::from("artist-formerly-known-as")), - }); + }) + .unwrap(); let symbol = rawdoc .get("symbol") @@ -286,8 +294,12 @@ fn symbol() { #[test] fn javascript_with_scope() { let rawdoc = RawDocument::from_document(&doc! { - "javascript_with_scope": Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope{ code: String::from("console.log(msg);"), scope: doc!{"ok": true}}), - }); + "javascript_with_scope": Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope { + code: String::from("console.log(msg);"), + scope: doc! { "ok": true } + }), + }) + .unwrap(); let js_with_scope = rawdoc .get("javascript_with_scope") .expect("error finding key javascript_with_scope") @@ -310,7 +322,8 @@ fn javascript_with_scope() { fn int32() { let rawdoc = RawDocument::from_document(&doc! { "int32": 23i32, - }); + }) + .unwrap(); let int32 = rawdoc .get("int32") .expect("error finding key int32") @@ -324,7 +337,8 @@ fn int32() { fn timestamp() { let rawdoc = RawDocument::from_document(&doc! { "timestamp": Bson::Timestamp(Timestamp { time: 3542578, increment: 7 }), - }); + }) + .unwrap(); let ts = rawdoc .get("timestamp") .expect("error finding key timestamp") @@ -340,7 +354,8 @@ fn timestamp() { fn int64() { let rawdoc = RawDocument::from_document(&doc! { "int64": 46i64, - }); + }) + .unwrap(); let int64 = rawdoc .get("int64") .expect("error finding key int64") @@ -370,7 +385,7 @@ fn document_iteration() { "int64": 46i64, "end": "END", }; - let rawdoc = RawDocument::from_document(&doc); + let rawdoc = RawDocument::from_document(&doc).unwrap(); let rawdocref = rawdoc.as_ref(); assert_eq!( From 7dca3a3964f5ecbc8e8d5259f442372e4d7f59f3 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 6 Oct 2021 16:49:50 -0400 Subject: [PATCH 21/48] add Cow From implementations --- src/raw/doc.rs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/raw/doc.rs b/src/raw/doc.rs index b20401cb..79399337 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -1,5 +1,5 @@ use std::{ - borrow::Borrow, + borrow::{Borrow, Cow}, convert::{TryFrom, TryInto}, ops::Deref, }; @@ -166,6 +166,18 @@ impl RawDocument { } } +impl<'a> From for Cow<'a, RawDocumentRef> { + fn from(rd: RawDocument) -> Self { + Cow::Owned(rd) + } +} + +impl<'a> From<&'a RawDocument> for Cow<'a, RawDocumentRef> { + fn from(rd: &'a RawDocument) -> Self { + Cow::Borrowed(rd.as_ref()) + } +} + impl TryFrom for Document { type Error = Error; @@ -649,6 +661,12 @@ impl Deref for RawDocument { } } +impl<'a> From<&'a RawDocumentRef> for Cow<'a, RawDocumentRef> { + fn from(rdr: &'a RawDocumentRef) -> Self { + Cow::Borrowed(rdr) + } +} + impl TryFrom<&RawDocumentRef> for crate::Document { type Error = Error; From b7073871581b683758df247dceface38aa328f28 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 6 Oct 2021 18:29:32 -0400 Subject: [PATCH 22/48] mark RawArray and RawDocumentRef as repr(transparent) --- src/raw/array.rs | 1 + src/raw/doc.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/src/raw/array.rs b/src/raw/array.rs index 06b0745e..a9e2e1b4 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -13,6 +13,7 @@ use super::{ use crate::{oid::ObjectId, Bson, DateTime}; /// A BSON array referencing raw bytes stored elsewhere. +#[repr(transparent)] pub struct RawArray { doc: RawDocumentRef, } diff --git a/src/raw/doc.rs b/src/raw/doc.rs index 79399337..9cdd347b 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -255,6 +255,7 @@ impl ToOwned for RawDocumentRef { /// # Ok::<(), Error>(()) /// ``` #[derive(Debug)] +#[repr(transparent)] pub struct RawDocumentRef { data: [u8], } From 8714cc10dff2b13ed784297020334fcb04e443f6 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 6 Oct 2021 18:37:48 -0400 Subject: [PATCH 23/48] use a vec internally in RawDocument, fix tests --- src/raw/doc.rs | 58 ++++++++++++++++++++++++++------------------------ src/raw/mod.rs | 4 ++-- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/src/raw/doc.rs b/src/raw/doc.rs index 9cdd347b..0e3ddd59 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -56,7 +56,7 @@ use crate::{oid::ObjectId, spec::ElementType, Document}; /// ``` #[derive(Clone, Debug)] pub struct RawDocument { - data: Box<[u8]>, + data: Vec, } impl RawDocument { @@ -98,9 +98,7 @@ impl RawDocument { }); } - Ok(Self { - data: data.into_boxed_slice(), - }) + Ok(Self { data }) } /// Create a RawDocument from a Document. @@ -117,13 +115,14 @@ impl RawDocument { /// let doc = RawDocument::from_document(&document); /// # Ok::<(), Error>(()) /// ``` - pub fn from_document(doc: &Document) -> crate::ser::Result { + pub fn from_document(doc: &Document) -> Result { let mut data = Vec::new(); - doc.to_writer(&mut data)?; + doc.to_writer(&mut data) + .map_err(|e| Error::MalformedValue { + message: e.to_string(), + })?; - Ok(Self { - data: data.into_boxed_slice(), - }) + Ok(Self { data }) } /// Gets an iterator over the elements in the `RawDocument`, which yields `Result<&str, @@ -133,7 +132,7 @@ impl RawDocument { /// # use bson::raw::Error; /// use bson::{doc, raw::RawDocument}; /// - /// let doc = RawDocument::from_document(&doc! { "ferris": true }); + /// let doc = RawDocument::from_document(&doc! { "ferris": true })?; /// /// for element in doc.iter() { /// let (key, value) = element?; @@ -155,11 +154,12 @@ impl RawDocument { /// Return the contained data as a `Vec` /// /// ``` - /// # use bson::raw::RawDocument; - /// use bson::doc; + /// # use bson::raw::Error; + /// use bson::{doc, raw::RawDocument}; /// - /// let doc = RawDocument::from_document(&doc!{}); + /// let doc = RawDocument::from_document(&doc!{})?; /// assert_eq!(doc.into_vec(), b"\x05\x00\x00\x00\x00".to_vec()); + /// # Ok::<(), Error>(()) /// ``` pub fn into_vec(self) -> Vec { self.data.to_vec() @@ -330,7 +330,7 @@ impl RawDocumentRef { /// # Ok::<(), Error>(()) pub fn to_raw_document(&self) -> RawDocument { RawDocument { - data: self.data.to_owned().into_boxed_slice(), + data: self.data.to_owned(), } } @@ -344,7 +344,7 @@ impl RawDocumentRef { /// let doc = RawDocument::from_document(&doc! { /// "_id": ObjectId::new(), /// "f64": 2.5, - /// }); + /// })?; /// /// let element = doc.get("f64")?.expect("finding key f64"); /// assert_eq!(element.as_f64(), Ok(2.5)); @@ -380,7 +380,7 @@ impl RawDocumentRef { /// let doc = RawDocument::from_document(&doc! { /// "bool": true, /// "f64": 2.5, - /// }); + /// })?; /// /// assert_eq!(doc.get_f64("f64"), Ok(Some(2.5))); /// assert!(matches!(doc.get_f64("bool"), Err(Error::UnexpectedType { .. }))); @@ -400,7 +400,7 @@ impl RawDocumentRef { /// let doc = RawDocument::from_document(&doc! { /// "string": "hello", /// "bool": true, - /// }); + /// })?; /// /// assert_eq!(doc.get_str("string"), Ok(Some("hello"))); /// assert!(matches!(doc.get_str("bool"), Err(Error::UnexpectedType { .. }))); @@ -421,7 +421,7 @@ impl RawDocumentRef { /// let doc = RawDocument::from_document(&doc! { /// "doc": { "key": "value"}, /// "bool": true, - /// }); + /// })?; /// /// assert_eq!(doc.get_document("doc")?.expect("finding key doc").get_str("key"), Ok(Some("value"))); /// assert!(matches!(doc.get_document("bool").unwrap_err(), Error::UnexpectedType { .. })); @@ -442,7 +442,7 @@ impl RawDocumentRef { /// let doc = RawDocument::from_document(&doc! { /// "array": [true, 3], /// "bool": true, - /// }); + /// })?; /// /// let mut arr_iter = doc.get_array("array")?.expect("finding key array").into_iter(); /// let _: bool = arr_iter.next().unwrap()?.as_bool()?; @@ -472,7 +472,7 @@ impl RawDocumentRef { /// let doc = RawDocument::from_document(&doc! { /// "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1, 2, 3] }, /// "bool": true, - /// }); + /// })?; /// /// assert_eq!(doc.get_binary("binary")?.map(RawBinary::as_bytes), Some(&[1, 2, 3][..])); /// assert!(matches!(doc.get_binary("bool").unwrap_err(), Error::UnexpectedType { .. })); @@ -493,7 +493,7 @@ impl RawDocumentRef { /// let doc = RawDocument::from_document(&doc! { /// "_id": ObjectId::new(), /// "bool": true, - /// }); + /// })?; /// /// let oid = doc.get_object_id("_id")?.unwrap(); /// assert!(matches!(doc.get_object_id("bool").unwrap_err(), Error::UnexpectedType { .. })); @@ -514,7 +514,7 @@ impl RawDocumentRef { /// let doc = RawDocument::from_document(&doc! { /// "_id": ObjectId::new(), /// "bool": true, - /// }); + /// })?; /// /// assert!(doc.get_bool("bool")?.unwrap()); /// assert!(matches!(doc.get_bool("_id").unwrap_err(), Error::UnexpectedType { .. })); @@ -536,7 +536,8 @@ impl RawDocumentRef { /// let doc = RawDocument::from_document(&doc! { /// "created_at": dt, /// "bool": true, - /// }); + /// })?; + /// /// assert_eq!(doc.get_datetime("created_at")?, Some(dt)); /// assert!(matches!(doc.get_datetime("bool").unwrap_err(), Error::UnexpectedType { .. })); /// assert!(doc.get_datetime("unknown")?.is_none()); @@ -558,7 +559,7 @@ impl RawDocumentRef { /// options: "i".into(), /// }, /// "bool": true, - /// }); + /// })?; /// /// assert_eq!(doc.get_regex("regex")?.unwrap().pattern(), r"end\s*$"); /// assert_eq!(doc.get_regex("regex")?.unwrap().options(), "i"); @@ -579,7 +580,7 @@ impl RawDocumentRef { /// let doc = RawDocument::from_document(&doc! { /// "bool": true, /// "ts": Timestamp { time: 649876543, increment: 9 }, - /// }); + /// })?; /// /// let timestamp = doc.get_timestamp("ts")?.unwrap(); /// @@ -603,7 +604,7 @@ impl RawDocumentRef { /// let doc = RawDocument::from_document(&doc! { /// "bool": true, /// "i32": 1_000_000, - /// }); + /// })?; /// /// assert_eq!(doc.get_i32("i32"), Ok(Some(1_000_000))); /// assert!(matches!(doc.get_i32("bool"), Err(Error::UnexpectedType { .. }))); @@ -624,7 +625,7 @@ impl RawDocumentRef { /// let doc = RawDocument::from_document(&doc! { /// "bool": true, /// "i64": 9223372036854775807_i64, - /// }); + /// })?; /// /// assert_eq!(doc.get_i64("i64"), Ok(Some(9223372036854775807))); /// assert!(matches!(doc.get_i64("bool"), Err(Error::UnexpectedType { .. }))); @@ -640,8 +641,9 @@ impl RawDocumentRef { /// ``` /// # use bson::raw::Error; /// use bson::{doc, raw::RawDocument}; - /// let docbuf = RawDocument::from_document(&doc!{}); + /// let docbuf = RawDocument::from_document(&doc!{})?; /// assert_eq!(docbuf.as_bytes(), b"\x05\x00\x00\x00\x00"); + /// # Ok::<(), Error>(()) /// ``` pub fn as_bytes(&self) -> &[u8] { &self.data diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 117325b5..20ae81a3 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -37,7 +37,7 @@ //! } //! }; -//! let raw = RawDocument::from_document(&document); +//! let raw = RawDocument::from_document(&document)?; //! let value: Option<&str> = raw //! .get_document("goodbye")? //! .map(|doc| doc.get_str("cruel")) @@ -87,7 +87,7 @@ //! "year": "2021", //! }; //! -//! let doc = RawDocument::from_document(&original_doc); +//! let doc = RawDocument::from_document(&original_doc)?; //! let mut doc_iter = doc.iter(); //! //! let (key, value): (&str, RawBson) = doc_iter.next().unwrap()?; From ffe045331af65428c7e788f0f486d01989047077 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 6 Oct 2021 18:45:19 -0400 Subject: [PATCH 24/48] rename RawDocumentIter to Iter, add docstring --- src/raw/array.rs | 13 ++----------- src/raw/doc.rs | 21 +++++++++++---------- src/raw/mod.rs | 2 +- 3 files changed, 14 insertions(+), 22 deletions(-) diff --git a/src/raw/array.rs b/src/raw/array.rs index a9e2e1b4..fb1af795 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -1,15 +1,6 @@ use std::convert::TryFrom; -use super::{ - Error, - RawBinary, - RawBson, - RawDocumentIter, - RawDocumentRef, - RawRegex, - RawTimestamp, - Result, -}; +use super::{Error, Iter, RawBinary, RawBson, RawDocumentRef, RawRegex, RawTimestamp, Result}; use crate::{oid::ObjectId, Bson, DateTime}; /// A BSON array referencing raw bytes stored elsewhere. @@ -153,7 +144,7 @@ impl<'a> IntoIterator for &'a RawArray { /// An iterator over borrowed raw BSON array values. pub struct RawArrayIter<'a> { - inner: RawDocumentIter<'a>, + inner: Iter<'a>, } impl<'a> Iterator for RawArrayIter<'a> { diff --git a/src/raw/doc.rs b/src/raw/doc.rs index 0e3ddd59..179bbfd6 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -147,7 +147,7 @@ impl RawDocument { /// There is no owning iterator for RawDocument. If you need ownership over /// elements that might need to allocate, you must explicitly convert /// them to owned types yourself. - pub fn iter(&self) -> RawDocumentIter<'_> { + pub fn iter(&self) -> Iter<'_> { self.into_iter() } @@ -187,11 +187,11 @@ impl TryFrom for Document { } impl<'a> IntoIterator for &'a RawDocument { - type IntoIter = RawDocumentIter<'a>; + type IntoIter = Iter<'a>; type Item = Result<(&'a str, RawBson<'a>)>; - fn into_iter(self) -> RawDocumentIter<'a> { - RawDocumentIter { + fn into_iter(self) -> Iter<'a> { + Iter { doc: &self, offset: 4, } @@ -682,23 +682,24 @@ impl TryFrom<&RawDocumentRef> for crate::Document { } impl<'a> IntoIterator for &'a RawDocumentRef { - type IntoIter = RawDocumentIter<'a>; + type IntoIter = Iter<'a>; type Item = Result<(&'a str, RawBson<'a>)>; - fn into_iter(self) -> RawDocumentIter<'a> { - RawDocumentIter { + fn into_iter(self) -> Iter<'a> { + Iter { doc: self, offset: 4, } } } -pub struct RawDocumentIter<'a> { +/// An iterator over the document's entries. +pub struct Iter<'a> { doc: &'a RawDocumentRef, offset: usize, } -impl<'a> Iterator for RawDocumentIter<'a> { +impl<'a> Iterator for Iter<'a> { type Item = Result<(&'a str, RawBson<'a>)>; fn next(&mut self) -> Option)>> { @@ -714,7 +715,7 @@ impl<'a> Iterator for RawDocumentIter<'a> { } // helper function to ease the use of the `?` operator - fn read_next<'a>(iter: &mut RawDocumentIter<'a>) -> Result<(&'a str, RawBson<'a>)> { + fn read_next<'a>(iter: &mut Iter<'a>) -> Result<(&'a str, RawBson<'a>)> { let key = read_nullterminated(&iter.doc.data[iter.offset + 1..])?; let valueoffset = iter.offset + 1 + key.len() + 1; // type specifier + key + \0 diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 20ae81a3..09aa522c 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -111,7 +111,7 @@ use std::convert::TryInto; pub use self::{ array::{RawArray, RawArrayIter}, - doc::{RawDocument, RawDocumentIter, RawDocumentRef}, + doc::{Iter, RawDocument, RawDocumentRef}, elem::{RawBinary, RawBson, RawJavaScriptCodeWithScope, RawRegex, RawTimestamp}, error::{Error, Result}, }; From 7f641d8d243839ba7be886964558447c7e3c639d Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Thu, 7 Oct 2021 18:46:07 -0400 Subject: [PATCH 25/48] store the key in the error, reduce possibility of panics --- src/raw/doc.rs | 262 +++++++++++++++++++++++++---------------------- src/raw/elem.rs | 64 +++++++----- src/raw/error.rs | 60 +++++++++-- src/raw/mod.rs | 97 +++++++++++------- 4 files changed, 293 insertions(+), 190 deletions(-) diff --git a/src/raw/doc.rs b/src/raw/doc.rs index 179bbfd6..7747f949 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -1,10 +1,13 @@ use std::{ borrow::{Borrow, Cow}, convert::{TryFrom, TryInto}, - ops::Deref, + ops::{Deref, Range}, }; -use crate::DateTime; +use crate::{ + raw::error::{try_with_key, ErrorKind}, + DateTime, +}; use super::{ i32_from_slice, @@ -79,23 +82,23 @@ impl RawDocument { /// ``` pub fn new(data: Vec) -> Result { if data.len() < 5 { - return Err(Error::MalformedValue { + return Err(Error::new_without_key(ErrorKind::MalformedValue { message: "document too short".into(), - }); + })); } - let length = i32_from_slice(&data[..4])?; + let length = i32_from_slice(&data)?; if data.len() as i32 != length { - return Err(Error::MalformedValue { + return Err(Error::new_without_key(ErrorKind::MalformedValue { message: "document length incorrect".into(), - }); + })); } if data[data.len() - 1] != 0 { - return Err(Error::MalformedValue { + return Err(Error::new_without_key(ErrorKind::MalformedValue { message: "document not null-terminated".into(), - }); + })); } Ok(Self { data }) @@ -117,10 +120,12 @@ impl RawDocument { /// ``` pub fn from_document(doc: &Document) -> Result { let mut data = Vec::new(); - doc.to_writer(&mut data) - .map_err(|e| Error::MalformedValue { + doc.to_writer(&mut data).map_err(|e| Error { + key: None, + kind: ErrorKind::MalformedValue { message: e.to_string(), - })?; + }, + })?; Ok(Self { data }) } @@ -283,22 +288,31 @@ impl RawDocumentRef { let data = data.as_ref(); if data.len() < 5 { - return Err(Error::MalformedValue { - message: "document too short".into(), + return Err(Error { + key: None, + kind: ErrorKind::MalformedValue { + message: "document too short".into(), + }, }); } - let length = i32_from_slice(&data[..4])?; + let length = i32_from_slice(&data)?; if data.len() as i32 != length { - return Err(Error::MalformedValue { - message: "document length incorrect".into(), + return Err(Error { + key: None, + kind: ErrorKind::MalformedValue { + message: "document length incorrect".into(), + }, }); } if data[data.len() - 1] != 0 { - return Err(Error::MalformedValue { - message: "document not null-terminated".into(), + return Err(Error { + key: None, + kind: ErrorKind::MalformedValue { + message: "document not null-terminated".into(), + }, }); } @@ -374,7 +388,7 @@ impl RawDocumentRef { /// /// ``` /// # use bson::raw::Error; - /// use bson::raw::RawDocument; + /// use bson::raw::{ErrorKind, RawDocument}; /// use bson::doc; /// /// let doc = RawDocument::from_document(&doc! { @@ -383,7 +397,7 @@ impl RawDocumentRef { /// })?; /// /// assert_eq!(doc.get_f64("f64"), Ok(Some(2.5))); - /// assert!(matches!(doc.get_f64("bool"), Err(Error::UnexpectedType { .. }))); + /// assert!(matches!(doc.get_f64("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); /// assert_eq!(doc.get_f64("unknown"), Ok(None)); /// # Ok::<(), Error>(()) /// ``` @@ -395,7 +409,8 @@ impl RawDocumentRef { /// key corresponds to a value which isn't a string. /// /// ``` - /// use bson::{doc, raw::{RawDocument, Error}}; + /// # use bson::raw::Error; + /// use bson::{doc, raw::{RawDocument, ErrorKind}}; /// /// let doc = RawDocument::from_document(&doc! { /// "string": "hello", @@ -403,7 +418,7 @@ impl RawDocumentRef { /// })?; /// /// assert_eq!(doc.get_str("string"), Ok(Some("hello"))); - /// assert!(matches!(doc.get_str("bool"), Err(Error::UnexpectedType { .. }))); + /// assert!(matches!(doc.get_str("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); /// assert_eq!(doc.get_str("unknown"), Ok(None)); /// # Ok::<(), Error>(()) /// ``` @@ -415,8 +430,8 @@ impl RawDocumentRef { /// the key corresponds to a value which isn't a document. /// /// ``` - /// use bson::raw::Error; - /// use bson::{doc, raw::RawDocument}; + /// # use bson::raw::Error; + /// use bson::{doc, raw::{ErrorKind, RawDocument}}; /// /// let doc = RawDocument::from_document(&doc! { /// "doc": { "key": "value"}, @@ -424,7 +439,7 @@ impl RawDocumentRef { /// })?; /// /// assert_eq!(doc.get_document("doc")?.expect("finding key doc").get_str("key"), Ok(Some("value"))); - /// assert!(matches!(doc.get_document("bool").unwrap_err(), Error::UnexpectedType { .. })); + /// assert!(matches!(doc.get_document("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); /// assert!(doc.get_document("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` @@ -464,7 +479,7 @@ impl RawDocumentRef { /// # use bson::raw::Error; /// use bson::{ /// doc, - /// raw::{RawDocument, RawBinary}, + /// raw::{ErrorKind, RawDocument, RawBinary}, /// spec::BinarySubtype, /// Binary, /// }; @@ -475,7 +490,7 @@ impl RawDocumentRef { /// })?; /// /// assert_eq!(doc.get_binary("binary")?.map(RawBinary::as_bytes), Some(&[1, 2, 3][..])); - /// assert!(matches!(doc.get_binary("bool").unwrap_err(), Error::UnexpectedType { .. })); + /// assert!(matches!(doc.get_binary("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); /// assert!(doc.get_binary("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` @@ -488,7 +503,7 @@ impl RawDocumentRef { /// /// ``` /// # use bson::raw::Error; - /// use bson::{doc, oid::ObjectId, raw::RawDocument}; + /// use bson::{doc, oid::ObjectId, raw::{ErrorKind, RawDocument}}; /// /// let doc = RawDocument::from_document(&doc! { /// "_id": ObjectId::new(), @@ -496,7 +511,7 @@ impl RawDocumentRef { /// })?; /// /// let oid = doc.get_object_id("_id")?.unwrap(); - /// assert!(matches!(doc.get_object_id("bool").unwrap_err(), Error::UnexpectedType { .. })); + /// assert!(matches!(doc.get_object_id("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); /// assert!(doc.get_object_id("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` @@ -508,8 +523,8 @@ impl RawDocumentRef { /// the key corresponds to a value which isn't a boolean. /// /// ``` - /// # use bson::raw::{RawDocument, Error}; - /// use bson::{doc, oid::ObjectId}; + /// # use bson::raw::Error; + /// use bson::{doc, oid::ObjectId, raw::{RawDocument, ErrorKind}}; /// /// let doc = RawDocument::from_document(&doc! { /// "_id": ObjectId::new(), @@ -517,7 +532,7 @@ impl RawDocumentRef { /// })?; /// /// assert!(doc.get_bool("bool")?.unwrap()); - /// assert!(matches!(doc.get_bool("_id").unwrap_err(), Error::UnexpectedType { .. })); + /// assert!(matches!(doc.get_bool("_id").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); /// assert!(doc.get_object_id("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` @@ -530,7 +545,7 @@ impl RawDocumentRef { /// /// ``` /// # use bson::raw::Error; - /// use bson::{doc, raw::RawDocument, DateTime}; + /// use bson::{doc, raw::{ErrorKind, RawDocument}, DateTime}; /// /// let dt = DateTime::now(); /// let doc = RawDocument::from_document(&doc! { @@ -539,7 +554,7 @@ impl RawDocumentRef { /// })?; /// /// assert_eq!(doc.get_datetime("created_at")?, Some(dt)); - /// assert!(matches!(doc.get_datetime("bool").unwrap_err(), Error::UnexpectedType { .. })); + /// assert!(matches!(doc.get_datetime("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); /// assert!(doc.get_datetime("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` @@ -551,7 +566,8 @@ impl RawDocumentRef { /// the key corresponds to a value which isn't a regex. /// /// ``` - /// use bson::{doc, Regex, raw::{RawDocument, Error}}; + /// # use bson::raw::Error; + /// use bson::{doc, Regex, raw::{RawDocument, ErrorKind}}; /// /// let doc = RawDocument::from_document(&doc! { /// "regex": Regex { @@ -563,7 +579,7 @@ impl RawDocumentRef { /// /// assert_eq!(doc.get_regex("regex")?.unwrap().pattern(), r"end\s*$"); /// assert_eq!(doc.get_regex("regex")?.unwrap().options(), "i"); - /// assert!(matches!(doc.get_regex("bool").unwrap_err(), Error::UnexpectedType { .. })); + /// assert!(matches!(doc.get_regex("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); /// assert!(doc.get_regex("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` @@ -575,7 +591,8 @@ impl RawDocumentRef { /// error if the key corresponds to a value which isn't a timestamp. /// /// ``` - /// use bson::{doc, Timestamp, raw::{RawDocument, Error}}; + /// # use bson::raw::Error; + /// use bson::{doc, Timestamp, raw::{RawDocument, ErrorKind}}; /// /// let doc = RawDocument::from_document(&doc! { /// "bool": true, @@ -586,7 +603,7 @@ impl RawDocumentRef { /// /// assert_eq!(timestamp.time(), 649876543); /// assert_eq!(timestamp.increment(), 9); - /// assert!(matches!(doc.get_timestamp("bool"), Err(Error::UnexpectedType { .. }))); + /// assert!(matches!(doc.get_timestamp("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); /// assert_eq!(doc.get_timestamp("unknown"), Ok(None)); /// # Ok::<(), Error>(()) /// ``` @@ -598,8 +615,8 @@ impl RawDocumentRef { /// the key corresponds to a value which isn't a 32-bit integer. /// /// ``` - /// # use bson::raw::{RawDocument, Error}; - /// use bson::doc; + /// # use bson::raw::Error; + /// use bson::{doc, raw::{RawDocument, ErrorKind}}; /// /// let doc = RawDocument::from_document(&doc! { /// "bool": true, @@ -607,7 +624,7 @@ impl RawDocumentRef { /// })?; /// /// assert_eq!(doc.get_i32("i32"), Ok(Some(1_000_000))); - /// assert!(matches!(doc.get_i32("bool"), Err(Error::UnexpectedType { .. }))); + /// assert!(matches!(doc.get_i32("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); /// assert_eq!(doc.get_i32("unknown"), Ok(None)); /// # Ok::<(), Error>(()) /// ``` @@ -620,7 +637,7 @@ impl RawDocumentRef { /// /// ``` /// # use bson::raw::Error; - /// use bson::{doc, raw::RawDocument}; + /// use bson::{doc, raw::{ErrorKind, RawDocument}}; /// /// let doc = RawDocument::from_document(&doc! { /// "bool": true, @@ -628,7 +645,7 @@ impl RawDocumentRef { /// })?; /// /// assert_eq!(doc.get_i64("i64"), Ok(Some(9223372036854775807))); - /// assert!(matches!(doc.get_i64("bool"), Err(Error::UnexpectedType { .. }))); + /// assert!(matches!(doc.get_i64("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); /// assert_eq!(doc.get_i64("unknown"), Ok(None)); /// # Ok::<(), Error>(()) /// ``` @@ -699,6 +716,43 @@ pub struct Iter<'a> { offset: usize, } +impl<'a> Iter<'a> { + fn verify_null_terminated(&self, range: Range) -> Result<()> { + if range.is_empty() { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: "value has empty range".to_string(), + })); + } + + self.verify_in_range(range.clone())?; + if self.doc.data[range.end - 1] == 0 { + return Ok(()); + } else { + return Err(Error { + key: None, + kind: ErrorKind::MalformedValue { + message: "not null terminated".into(), + }, + }); + } + } + + fn verify_in_range(&self, range: Range) -> Result<()> { + let start = range.start; + let len = range.len(); + if self.doc.data.get(range).is_none() { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: format!( + "length exceeds remaining length of buffer: {} vs {}", + len, + self.doc.data.len() - start + ), + })); + } + Ok(()) + } +} + impl<'a> Iterator for Iter<'a> { type Item = Result<(&'a str, RawBson<'a>)>; @@ -708,81 +762,69 @@ impl<'a> Iterator for Iter<'a> { // end of document marker return None; } else { - return Some(Err(Error::MalformedValue { - message: "document not null terminated".into(), + return Some(Err(Error { + key: None, + kind: ErrorKind::MalformedValue { + message: "document not null terminated".into(), + }, })); } + } else if self.offset >= self.doc.data.len() { + // return None on subsequent iterations after an error + return None; } - // helper function to ease the use of the `?` operator - fn read_next<'a>(iter: &mut Iter<'a>) -> Result<(&'a str, RawBson<'a>)> { - let key = read_nullterminated(&iter.doc.data[iter.offset + 1..])?; + let key = match read_nullterminated(&self.doc.data[self.offset + 1..]) { + Ok(k) => k, + Err(e) => return Some(Err(e)), + }; - let valueoffset = iter.offset + 1 + key.len() + 1; // type specifier + key + \0 + let kvp_result = try_with_key(key, || { + let valueoffset = self.offset + 1 + key.len() + 1; // type specifier + key + \0 - let element_type = match ElementType::from(iter.doc.data[iter.offset]) { + let element_type = match ElementType::from(self.doc.data[self.offset]) { Some(et) => et, None => { - return Err(Error::MalformedValue { - message: format!("invalid tag: {}", iter.doc.data[iter.offset]), - }) + return Err(Error::new_with_key( + key, + ErrorKind::MalformedValue { + message: format!("invalid tag: {}", self.doc.data[self.offset]), + }, + )) } }; let element_size = match element_type { ElementType::Double => 8, ElementType::String => { - let size = - 4 + i32_from_slice(&iter.doc.data[valueoffset..valueoffset + 4])? as usize; - - if iter.doc.data[valueoffset + size - 1] != 0 { - return Err(Error::MalformedValue { - message: "string not null terminated".into(), - }); - } - + let size = 4 + i32_from_slice(&self.doc.data[valueoffset..])? as usize; + self.verify_null_terminated(valueoffset..(valueoffset + size))?; size } ElementType::EmbeddedDocument => { - let size = - i32_from_slice(&iter.doc.data[valueoffset..valueoffset + 4])? as usize; - - if iter.doc.data[valueoffset + size - 1] != 0 { - return Err(Error::MalformedValue { - message: "document not null terminated".into(), - }); - } - + let size = i32_from_slice(&self.doc.data[valueoffset..])? as usize; + self.verify_null_terminated(valueoffset..(valueoffset + size))?; size } ElementType::Array => { - let size = - i32_from_slice(&iter.doc.data[valueoffset..valueoffset + 4])? as usize; - - if iter.doc.data[valueoffset + size - 1] != 0 { - return Err(Error::MalformedValue { - message: "array not null terminated".into(), - }); - } - + let size = i32_from_slice(&self.doc.data[valueoffset..])? as usize; + self.verify_null_terminated(valueoffset..(valueoffset + size))?; size } - ElementType::Binary => { - 5 + i32_from_slice(&iter.doc.data[valueoffset..valueoffset + 4])? as usize - } + ElementType::Binary => 5 + i32_from_slice(&self.doc.data[valueoffset..])? as usize, ElementType::Undefined => 0, ElementType::ObjectId => 12, ElementType::Boolean => 1, ElementType::DateTime => 8, ElementType::Null => 0, ElementType::RegularExpression => { - let regex = match read_nullterminated(&iter.doc.data[valueoffset..]) { + let regex = match read_nullterminated(&self.doc.data[valueoffset..]) { Ok(regex) => regex, Err(err) => return Err(err), }; let options = match read_nullterminated( - &iter.doc.data[valueoffset + regex.len() + 1..], + &self.doc.data[valueoffset + regex.len() + 1..], ) { Ok(options) => options, Err(err) => return Err(err), @@ -791,44 +833,20 @@ impl<'a> Iterator for Iter<'a> { regex.len() + options.len() + 2 } ElementType::DbPointer => { - let string_size = - 4 + i32_from_slice(&iter.doc.data[valueoffset..valueoffset + 4])? as usize; - + let string_size = 4 + i32_from_slice(&self.doc.data[valueoffset..])? as usize; let id_size = 12; - - if iter.doc.data[valueoffset + string_size - 1] != 0 { - return Err(Error::MalformedValue { - message: "DBPointer string not null-terminated".into(), - }); - } - + self.verify_null_terminated(valueoffset..(valueoffset + string_size))?; string_size + id_size } ElementType::JavaScriptCode => { - let size = - 4 + i32_from_slice(&iter.doc.data[valueoffset..valueoffset + 4])? as usize; - - if iter.doc.data[valueoffset + size - 1] != 0 { - return Err(Error::MalformedValue { - message: "javascript code not null-terminated".into(), - }); - } - + let size = 4 + i32_from_slice(&self.doc.data[valueoffset..])? as usize; + self.verify_null_terminated(valueoffset..(valueoffset + size))?; size } - ElementType::Symbol => { - 4 + i32_from_slice(&iter.doc.data[valueoffset..valueoffset + 4])? as usize - } + ElementType::Symbol => 4 + i32_from_slice(&self.doc.data[valueoffset..])? as usize, ElementType::JavaScriptCodeWithScope => { - let size = - i32_from_slice(&iter.doc.data[valueoffset..valueoffset + 4])? as usize; - - if iter.doc.data[valueoffset + size - 1] != 0 { - return Err(Error::MalformedValue { - message: "javascript with scope not null-terminated".into(), - }); - } - + let size = i32_from_slice(&self.doc.data[valueoffset..])? as usize; + self.verify_null_terminated(valueoffset..(valueoffset + size))?; size } ElementType::Int32 => 4, @@ -840,14 +858,16 @@ impl<'a> Iterator for Iter<'a> { }; let nextoffset = valueoffset + element_size; - iter.offset = nextoffset; + self.offset = nextoffset; + + self.verify_in_range(valueoffset..nextoffset)?; Ok(( key, - RawBson::new(element_type, &iter.doc.data[valueoffset..nextoffset]), + RawBson::new(element_type, &self.doc.data[valueoffset..nextoffset]), )) - } + }); - Some(read_next(self)) + Some(kvp_result) } } diff --git a/src/raw/elem.rs b/src/raw/elem.rs index 45c45f6e..a52716eb 100644 --- a/src/raw/elem.rs +++ b/src/raw/elem.rs @@ -6,6 +6,7 @@ use crate::{de::read_bool, DateTime, Decimal128}; #[cfg(feature = "decimal128")] use super::d128_from_slice; use super::{ + error::ErrorKind, i32_from_slice, i64_from_slice, read_lenencoded, @@ -46,9 +47,12 @@ impl<'a> RawBson<'a> { fn validate_type(self, expected: ElementType) -> Result<()> { if self.element_type != expected { - return Err(Error::UnexpectedType { - actual: self.element_type, - expected, + return Err(Error { + key: None, + kind: ErrorKind::UnexpectedType { + actual: self.element_type, + expected, + }, }); } Ok(()) @@ -58,8 +62,11 @@ impl<'a> RawBson<'a> { pub fn as_f64(self) -> Result { self.validate_type(ElementType::Double)?; Ok(f64::from_bits(u64::from_le_bytes( - self.data.try_into().map_err(|_| Error::MalformedValue { - message: "f64 should be 8 bytes long".into(), + self.data.try_into().map_err(|_| Error { + key: None, + kind: ErrorKind::MalformedValue { + message: "f64 should be 8 bytes long".into(), + }, })?, ))) } @@ -89,22 +96,25 @@ impl<'a> RawBson<'a> { let length = i32_from_slice(&self.data[0..4])?; let subtype = BinarySubtype::from(self.data[4]); if self.data.len() as i32 != length + 5 { - return Err(Error::MalformedValue { - message: "binary bson has wrong declared length".into(), + return Err(Error { + key: None, + kind: ErrorKind::MalformedValue { + message: "binary bson has wrong declared length".into(), + }, }); } let data = match subtype { BinarySubtype::BinaryOld => { if length < 4 { - return Err(Error::MalformedValue { + return Err(Error::new_without_key(ErrorKind::MalformedValue { message: "old binary subtype has no inner declared length".into(), - }); + })); } let oldlength = i32_from_slice(&self.data[5..9])?; if oldlength + 4 != length { - return Err(Error::MalformedValue { + return Err(Error::new_without_key(ErrorKind::MalformedValue { message: "old binary subtype has wrong inner declared length".into(), - }); + })); } &self.data[9..] } @@ -117,8 +127,10 @@ impl<'a> RawBson<'a> { pub fn as_object_id(self) -> Result { self.validate_type(ElementType::ObjectId)?; Ok(ObjectId::from_bytes(self.data.try_into().map_err( - |_| Error::MalformedValue { - message: "object id should be 12 bytes long".into(), + |_| { + Error::new_without_key(ErrorKind::MalformedValue { + message: "object id should be 12 bytes long".into(), + }) }, )?)) } @@ -127,12 +139,14 @@ impl<'a> RawBson<'a> { pub fn as_bool(self) -> Result { self.validate_type(ElementType::Boolean)?; if self.data.len() != 1 { - Err(Error::MalformedValue { + Err(Error::new_without_key(ErrorKind::MalformedValue { message: "boolean has length != 1".into(), - }) + })) } else { - read_bool(self.data).map_err(|e| Error::MalformedValue { - message: e.to_string(), + read_bool(self.data).map_err(|e| { + Error::new_without_key(ErrorKind::MalformedValue { + message: e.to_string(), + }) }) } } @@ -170,9 +184,9 @@ impl<'a> RawBson<'a> { let length = i32_from_slice(&self.data[..4])?; if (self.data.len() as i32) != length { - return Err(Error::MalformedValue { - message: "".to_string(), - }); + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: format!("TODO: Java"), + })); } let code = read_lenencoded(&self.data[4..])?; @@ -204,8 +218,10 @@ impl<'a> RawBson<'a> { /// Gets the decimal that's referenced or returns an error if the value isn't a BSON Decimal128. pub fn as_decimal128(self) -> Result { self.validate_type(ElementType::Decimal128)?; - let bytes: [u8; 128 / 8] = self.data.try_into().map_err(|_| Error::MalformedValue { - message: format!("decimal128 value has invalid length: {}", self.data.len()), + let bytes: [u8; 128 / 8] = self.data.try_into().map_err(|_| { + Error::new_without_key(ErrorKind::MalformedValue { + message: format!("decimal128 value has invalid length: {}", self.data.len()), + }) })?; Ok(Decimal128::from_bytes(bytes)) } @@ -323,9 +339,9 @@ impl<'a> RawRegex<'a> { options: opts, }) } else { - Err(Error::MalformedValue { + Err(Error::new_without_key(ErrorKind::MalformedValue { message: "expected two null-terminated strings".into(), - }) + })) } } diff --git a/src/raw/error.rs b/src/raw/error.rs index f2cf70ad..9be2d279 100644 --- a/src/raw/error.rs +++ b/src/raw/error.rs @@ -3,9 +3,44 @@ use std::str::Utf8Error; use crate::spec::ElementType; /// An error that occurs when attempting to parse raw BSON bytes. -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] #[non_exhaustive] -pub enum Error { +pub struct Error { + /// The type of error that was encountered. + pub kind: ErrorKind, + + /// They key associated with the error, if any. + pub key: Option, +} + +impl Error { + pub(crate) fn new_with_key(key: impl AsRef, kind: ErrorKind) -> Self { + Self { + kind, + key: Some(key.as_ref().to_string()), + } + } + + pub(crate) fn new_without_key(kind: ErrorKind) -> Self { + Self { key: None, kind } + } + + pub(crate) fn with_key(mut self, key: impl AsRef) -> Self { + self.key = Some(key.as_ref().to_string()); + self + } +} + +/// Execute the provided closure, mapping the key of the returned error (if any) to the provided +/// key. +pub(crate) fn try_with_key Result>(key: impl AsRef, f: F) -> Result { + f().map_err(|e| e.with_key(key)) +} + +/// The different categories of errors that can be returned when reading from raw BSON. +#[derive(Clone, Debug, PartialEq)] +#[non_exhaustive] +pub enum ErrorKind { /// A BSON value did not fit the expected type. #[non_exhaustive] UnexpectedType { @@ -24,14 +59,23 @@ pub enum Error { impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - match self { - Self::UnexpectedType { actual, expected } => write!( + let p = self + .key + .as_ref() + .map(|k| format!("error at key \"{}\": ", k)); + + let prefix = p.as_ref().map_or("", |p| p.as_str()); + + match &self.kind { + ErrorKind::UnexpectedType { actual, expected } => write!( f, - "unexpected element type: {:?}, expected: {:?}", - actual, expected + "{} unexpected element type: {:?}, expected: {:?}", + prefix, actual, expected ), - Self::MalformedValue { message } => write!(f, "malformed value: {:?}", message), - Self::Utf8EncodingError(e) => write!(f, "utf-8 encoding error: {}", e), + ErrorKind::MalformedValue { message } => { + write!(f, "{}malformed value: {:?}", prefix, message) + } + ErrorKind::Utf8EncodingError(e) => write!(f, "{}utf-8 encoding error: {}", prefix, e), } } } diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 09aa522c..58069d35 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -113,53 +113,70 @@ pub use self::{ array::{RawArray, RawArrayIter}, doc::{Iter, RawDocument, RawDocumentRef}, elem::{RawBinary, RawBson, RawJavaScriptCodeWithScope, RawRegex, RawTimestamp}, - error::{Error, Result}, + error::{Error, ErrorKind, Result}, }; -/// Given a 4 byte u8 slice, return an i32 calculated from the bytes in -/// little endian order -/// -/// # Panics -/// -/// This function panics if given a slice that is not four bytes long. +/// Given a u8 slice, return an i32 calculated from the first four bytes in +/// little endian order. fn i32_from_slice(val: &[u8]) -> Result { - Ok(i32::from_le_bytes(val.try_into().map_err(|_| { - Error::MalformedValue { - message: format!("expected 4 bytes to read i32, instead got {}", val.len()), - } - })?)) + let arr = val + .get(0..4) + .and_then(|s| s.try_into().ok()) + .ok_or_else(|| Error { + key: None, + kind: ErrorKind::MalformedValue { + message: format!("expected 4 bytes to read i32, instead got {}", val.len()), + }, + })?; + Ok(i32::from_le_bytes(arr)) } -/// Given an 8 byte u8 slice, return an i64 calculated from the bytes in -/// little endian order +/// Given an u8 slice, return an i64 calculated from the first 8 bytes in +/// little endian order. fn i64_from_slice(val: &[u8]) -> Result { - Ok(i64::from_le_bytes(val.try_into().map_err(|_| { - Error::MalformedValue { - message: format!("expected 8 bytes to read i64, instead got {}", val.len()), - } - })?)) + let arr = val + .get(0..8) + .and_then(|s| s.try_into().ok()) + .ok_or_else(|| Error { + key: None, + kind: ErrorKind::MalformedValue { + message: format!("expected 8 bytes to read i64, instead got {}", val.len()), + }, + })?; + Ok(i64::from_le_bytes(arr)) } -/// Given a 4 byte u8 slice, return a u32 calculated from the bytes in -/// little endian order +/// Given a 4 byte u8 slice, return a u32 calculated from the first 4 bytes in +/// little endian order. fn u32_from_slice(val: &[u8]) -> Result { - Ok(u32::from_le_bytes(val.try_into().map_err(|_| { - Error::MalformedValue { - message: format!("expected 4 bytes to read u32, instead got {}", val.len()), - } - })?)) + let arr = val + .get(0..4) + .and_then(|s| s.try_into().ok()) + .ok_or_else(|| Error { + key: None, + kind: ErrorKind::MalformedValue { + message: format!("expected 4 bytes to read u32, instead got {}", val.len()), + }, + })?; + Ok(u32::from_le_bytes(arr)) } fn read_nullterminated(buf: &[u8]) -> Result<&str> { let mut splits = buf.splitn(2, |x| *x == 0); - let value = splits.next().ok_or_else(|| Error::MalformedValue { - message: "no value".into(), + let value = splits.next().ok_or_else(|| Error { + key: None, + kind: ErrorKind::MalformedValue { + message: "no value".into(), + }, })?; if splits.next().is_some() { Ok(try_to_str(value)?) } else { - Err(Error::MalformedValue { - message: "expected null terminator".into(), + Err(Error { + key: None, + kind: ErrorKind::MalformedValue { + message: "expected null terminator".into(), + }, }) } } @@ -167,12 +184,15 @@ fn read_nullterminated(buf: &[u8]) -> Result<&str> { fn read_lenencoded(buf: &[u8]) -> Result<&str> { let length = i32_from_slice(&buf[..4])?; if (buf.len() as i32) < length + 4 { - return Err(Error::MalformedValue { - message: format!( - "expected buffer to contain at least {} bytes, but it only has {}", - length + 4, - buf.len() - ), + return Err(Error { + key: None, + kind: ErrorKind::MalformedValue { + message: format!( + "expected buffer to contain at least {} bytes, but it only has {}", + length + 4, + buf.len() + ), + }, }); } try_to_str(&buf[4..4 + length as usize - 1]) @@ -181,6 +201,9 @@ fn read_lenencoded(buf: &[u8]) -> Result<&str> { fn try_to_str(data: &[u8]) -> Result<&str> { match std::str::from_utf8(data) { Ok(s) => Ok(s), - Err(e) => Err(Error::Utf8EncodingError(e)), + Err(e) => Err(Error { + key: None, + kind: ErrorKind::Utf8EncodingError(e), + }), } } From 308e6d49325e4ed891816d3e8cc9f27b88ab6658 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Thu, 7 Oct 2021 19:36:37 -0400 Subject: [PATCH 26/48] wip struct -> enum --- src/decimal128.rs | 2 +- src/raw/array.rs | 8 +- src/raw/doc.rs | 689 ++++++++++++++++++++++---------------------- src/raw/elem.rs | 577 +++++++++++++++++++++++-------------- src/raw/test/mod.rs | 6 +- 5 files changed, 727 insertions(+), 555 deletions(-) diff --git a/src/decimal128.rs b/src/decimal128.rs index e4f343c0..c217bb5d 100644 --- a/src/decimal128.rs +++ b/src/decimal128.rs @@ -6,7 +6,7 @@ use std::fmt; /// /// Currently, this type can only be used to round-trip through BSON. See /// [RUST-36](https://jira.mongodb.org/browse/RUST-36) to track the progress towards a complete implementation. -#[derive(Clone, PartialEq)] +#[derive(Copy, Clone, PartialEq)] pub struct Decimal128 { /// BSON bytes containing the decimal128. Stored for round tripping. pub(crate) bytes: [u8; 128 / 8], diff --git a/src/raw/array.rs b/src/raw/array.rs index fb1af795..50db8968 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -1,7 +1,7 @@ use std::convert::TryFrom; use super::{Error, Iter, RawBinary, RawBson, RawDocumentRef, RawRegex, RawTimestamp, Result}; -use crate::{oid::ObjectId, Bson, DateTime}; +use crate::{oid::ObjectId, Bson, DateTime, Timestamp}; /// A BSON array referencing raw bytes stored elsewhere. #[repr(transparent)] @@ -35,9 +35,9 @@ impl RawArray { fn get_with<'a, T>( &'a self, index: usize, - f: impl FnOnce(RawBson<'a>) -> Result, + f: impl FnOnce(RawBson<'a>) -> Option, ) -> Result> { - self.get(index)?.map(f).transpose() + Ok(self.get(index)?.and_then(f)) } /// Gets the BSON double at the given index or returns an error if the value at that index isn't @@ -96,7 +96,7 @@ impl RawArray { /// Gets a reference to the BSON timestamp at the given index or returns an error if the /// value at that index isn't a timestamp. - pub fn get_timestamp(&self, index: usize) -> Result>> { + pub fn get_timestamp(&self, index: usize) -> Result> { self.get_with(index, RawBson::as_timestamp) } diff --git a/src/raw/doc.rs b/src/raw/doc.rs index 7747f949..2191ca04 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -11,6 +11,7 @@ use crate::{ use super::{ i32_from_slice, + read_lenencoded, read_nullterminated, Error, RawArray, @@ -375,283 +376,283 @@ impl RawDocumentRef { Ok(None) } - fn get_with<'a, T>( - &'a self, - key: &str, - f: impl FnOnce(RawBson<'a>) -> Result, - ) -> Result> { - self.get(key)?.map(f).transpose() - } - - /// Gets a reference to the BSON double value corresponding to a given key or returns an error - /// if the key corresponds to a value which isn't a double. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::raw::{ErrorKind, RawDocument}; - /// use bson::doc; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "bool": true, - /// "f64": 2.5, - /// })?; - /// - /// assert_eq!(doc.get_f64("f64"), Ok(Some(2.5))); - /// assert!(matches!(doc.get_f64("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert_eq!(doc.get_f64("unknown"), Ok(None)); - /// # Ok::<(), Error>(()) - /// ``` - pub fn get_f64(&self, key: &str) -> Result> { - self.get_with(key, RawBson::as_f64) - } - - /// Gets a reference to the string value corresponding to a given key or returns an error if the - /// key corresponds to a value which isn't a string. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, raw::{RawDocument, ErrorKind}}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "string": "hello", - /// "bool": true, - /// })?; - /// - /// assert_eq!(doc.get_str("string"), Ok(Some("hello"))); - /// assert!(matches!(doc.get_str("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert_eq!(doc.get_str("unknown"), Ok(None)); - /// # Ok::<(), Error>(()) - /// ``` - pub fn get_str<'a>(&'a self, key: &str) -> Result> { - self.get_with(key, RawBson::as_str) - } - - /// Gets a reference to the document value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't a document. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, raw::{ErrorKind, RawDocument}}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "doc": { "key": "value"}, - /// "bool": true, - /// })?; - /// - /// assert_eq!(doc.get_document("doc")?.expect("finding key doc").get_str("key"), Ok(Some("value"))); - /// assert!(matches!(doc.get_document("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert!(doc.get_document("unknown")?.is_none()); - /// # Ok::<(), Error>(()) - /// ``` - pub fn get_document<'a>(&'a self, key: &str) -> Result> { - self.get_with(key, RawBson::as_document) - } - - /// Gets a reference to the array value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't an array. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, raw::RawDocument}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "array": [true, 3], - /// "bool": true, - /// })?; - /// - /// let mut arr_iter = doc.get_array("array")?.expect("finding key array").into_iter(); - /// let _: bool = arr_iter.next().unwrap()?.as_bool()?; - /// let _: i32 = arr_iter.next().unwrap()?.as_i32()?; - /// - /// assert!(arr_iter.next().is_none()); - /// assert!(doc.get_array("bool").is_err()); - /// assert!(doc.get_array("unknown")?.is_none()); - /// # Ok::<(), Error>(()) - /// ``` - pub fn get_array<'a>(&'a self, key: &str) -> Result> { - self.get_with(key, RawBson::as_array) - } - - /// Gets a reference to the BSON binary value corresponding to a given key or returns an error - /// if the key corresponds to a value which isn't a binary value. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{ - /// doc, - /// raw::{ErrorKind, RawDocument, RawBinary}, - /// spec::BinarySubtype, - /// Binary, - /// }; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1, 2, 3] }, - /// "bool": true, - /// })?; - /// - /// assert_eq!(doc.get_binary("binary")?.map(RawBinary::as_bytes), Some(&[1, 2, 3][..])); - /// assert!(matches!(doc.get_binary("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert!(doc.get_binary("unknown")?.is_none()); - /// # Ok::<(), Error>(()) - /// ``` - pub fn get_binary<'a>(&'a self, key: &str) -> Result>> { - self.get_with(key, RawBson::as_binary) - } - - /// Gets a reference to the ObjectId value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't an ObjectId. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, oid::ObjectId, raw::{ErrorKind, RawDocument}}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "_id": ObjectId::new(), - /// "bool": true, - /// })?; - /// - /// let oid = doc.get_object_id("_id")?.unwrap(); - /// assert!(matches!(doc.get_object_id("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert!(doc.get_object_id("unknown")?.is_none()); - /// # Ok::<(), Error>(()) - /// ``` - pub fn get_object_id(&self, key: &str) -> Result> { - self.get_with(key, RawBson::as_object_id) - } - - /// Gets a reference to the boolean value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't a boolean. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, oid::ObjectId, raw::{RawDocument, ErrorKind}}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "_id": ObjectId::new(), - /// "bool": true, - /// })?; - /// - /// assert!(doc.get_bool("bool")?.unwrap()); - /// assert!(matches!(doc.get_bool("_id").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert!(doc.get_object_id("unknown")?.is_none()); - /// # Ok::<(), Error>(()) - /// ``` - pub fn get_bool(&self, key: &str) -> Result> { - self.get_with(key, RawBson::as_bool) - } - - /// Gets a reference to the BSON DateTime value corresponding to a given key or returns an error - /// if the key corresponds to a value which isn't a DateTime. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, raw::{ErrorKind, RawDocument}, DateTime}; - /// - /// let dt = DateTime::now(); - /// let doc = RawDocument::from_document(&doc! { - /// "created_at": dt, - /// "bool": true, - /// })?; - /// - /// assert_eq!(doc.get_datetime("created_at")?, Some(dt)); - /// assert!(matches!(doc.get_datetime("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert!(doc.get_datetime("unknown")?.is_none()); - /// # Ok::<(), Error>(()) - /// ``` - pub fn get_datetime(&self, key: &str) -> Result> { - self.get_with(key, RawBson::as_datetime) - } - - /// Gets a reference to the BSON regex value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't a regex. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, Regex, raw::{RawDocument, ErrorKind}}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "regex": Regex { - /// pattern: r"end\s*$".into(), - /// options: "i".into(), - /// }, - /// "bool": true, - /// })?; - /// - /// assert_eq!(doc.get_regex("regex")?.unwrap().pattern(), r"end\s*$"); - /// assert_eq!(doc.get_regex("regex")?.unwrap().options(), "i"); - /// assert!(matches!(doc.get_regex("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert!(doc.get_regex("unknown")?.is_none()); - /// # Ok::<(), Error>(()) - /// ``` - pub fn get_regex<'a>(&'a self, key: &str) -> Result>> { - self.get_with(key, RawBson::as_regex) - } - - /// Gets a reference to the BSON timestamp value corresponding to a given key or returns an - /// error if the key corresponds to a value which isn't a timestamp. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, Timestamp, raw::{RawDocument, ErrorKind}}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "bool": true, - /// "ts": Timestamp { time: 649876543, increment: 9 }, - /// })?; - /// - /// let timestamp = doc.get_timestamp("ts")?.unwrap(); - /// - /// assert_eq!(timestamp.time(), 649876543); - /// assert_eq!(timestamp.increment(), 9); - /// assert!(matches!(doc.get_timestamp("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert_eq!(doc.get_timestamp("unknown"), Ok(None)); - /// # Ok::<(), Error>(()) - /// ``` - pub fn get_timestamp<'a>(&'a self, key: &str) -> Result>> { - self.get_with(key, RawBson::as_timestamp) - } - - /// Gets a reference to the BSON int32 value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't a 32-bit integer. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, raw::{RawDocument, ErrorKind}}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "bool": true, - /// "i32": 1_000_000, - /// })?; - /// - /// assert_eq!(doc.get_i32("i32"), Ok(Some(1_000_000))); - /// assert!(matches!(doc.get_i32("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert_eq!(doc.get_i32("unknown"), Ok(None)); - /// # Ok::<(), Error>(()) - /// ``` - pub fn get_i32(&self, key: &str) -> Result> { - self.get_with(key, RawBson::as_i32) - } - - /// Gets a reference to the BSON int64 value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't a 64-bit integer. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, raw::{ErrorKind, RawDocument}}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "bool": true, - /// "i64": 9223372036854775807_i64, - /// })?; - /// - /// assert_eq!(doc.get_i64("i64"), Ok(Some(9223372036854775807))); - /// assert!(matches!(doc.get_i64("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert_eq!(doc.get_i64("unknown"), Ok(None)); - /// # Ok::<(), Error>(()) - /// ``` - pub fn get_i64(&self, key: &str) -> Result> { - self.get_with(key, RawBson::as_i64) - } + // fn get_with<'a, T>( + // &'a self, + // key: &str, + // f: impl FnOnce(RawBson<'a>) -> Result, + // ) -> Result> { + // self.get(key)?.map(f).transpose() + // } + + // /// Gets a reference to the BSON double value corresponding to a given key or returns an + // error /// if the key corresponds to a value which isn't a double. + // /// + // /// ``` + // /// # use bson::raw::Error; + // /// use bson::raw::{ErrorKind, RawDocument}; + // /// use bson::doc; + // /// + // /// let doc = RawDocument::from_document(&doc! { + // /// "bool": true, + // /// "f64": 2.5, + // /// })?; + // /// + // /// assert_eq!(doc.get_f64("f64"), Ok(Some(2.5))); + // /// assert!(matches!(doc.get_f64("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. + // })); /// assert_eq!(doc.get_f64("unknown"), Ok(None)); + // /// # Ok::<(), Error>(()) + // /// ``` + // pub fn get_f64(&self, key: &str) -> Result> { + // self.get_with(key, RawBson::as_f64) + // } + + // /// Gets a reference to the string value corresponding to a given key or returns an error if + // the /// key corresponds to a value which isn't a string. + // /// + // /// ``` + // /// # use bson::raw::Error; + // /// use bson::{doc, raw::{RawDocument, ErrorKind}}; + // /// + // /// let doc = RawDocument::from_document(&doc! { + // /// "string": "hello", + // /// "bool": true, + // /// })?; + // /// + // /// assert_eq!(doc.get_str("string"), Ok(Some("hello"))); + // /// assert!(matches!(doc.get_str("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. + // })); /// assert_eq!(doc.get_str("unknown"), Ok(None)); + // /// # Ok::<(), Error>(()) + // /// ``` + // pub fn get_str<'a>(&'a self, key: &str) -> Result> { + // self.get_with(key, RawBson::as_str) + // } + + // /// Gets a reference to the document value corresponding to a given key or returns an error + // if /// the key corresponds to a value which isn't a document. + // /// + // /// ``` + // /// # use bson::raw::Error; + // /// use bson::{doc, raw::{ErrorKind, RawDocument}}; + // /// + // /// let doc = RawDocument::from_document(&doc! { + // /// "doc": { "key": "value"}, + // /// "bool": true, + // /// })?; + // /// + // /// assert_eq!(doc.get_document("doc")?.expect("finding key doc").get_str("key"), + // Ok(Some("value"))); /// assert!(matches!(doc.get_document("bool").unwrap_err().kind, + // ErrorKind::UnexpectedType { .. })); /// assert!(doc.get_document("unknown")?.is_none()); + // /// # Ok::<(), Error>(()) + // /// ``` + // pub fn get_document<'a>(&'a self, key: &str) -> Result> { + // self.get_with(key, RawBson::as_document) + // } + + // /// Gets a reference to the array value corresponding to a given key or returns an error if + // /// the key corresponds to a value which isn't an array. + // /// + // /// ``` + // /// # use bson::raw::Error; + // /// use bson::{doc, raw::RawDocument}; + // /// + // /// let doc = RawDocument::from_document(&doc! { + // /// "array": [true, 3], + // /// "bool": true, + // /// })?; + // /// + // /// let mut arr_iter = doc.get_array("array")?.expect("finding key array").into_iter(); + // /// let _: bool = arr_iter.next().unwrap()?.as_bool()?; + // /// let _: i32 = arr_iter.next().unwrap()?.as_i32()?; + // /// + // /// assert!(arr_iter.next().is_none()); + // /// assert!(doc.get_array("bool").is_err()); + // /// assert!(doc.get_array("unknown")?.is_none()); + // /// # Ok::<(), Error>(()) + // /// ``` + // pub fn get_array<'a>(&'a self, key: &str) -> Result> { + // self.get_with(key, RawBson::as_array) + // } + + // /// Gets a reference to the BSON binary value corresponding to a given key or returns an + // error /// if the key corresponds to a value which isn't a binary value. + // /// + // /// ``` + // /// # use bson::raw::Error; + // /// use bson::{ + // /// doc, + // /// raw::{ErrorKind, RawDocument, RawBinary}, + // /// spec::BinarySubtype, + // /// Binary, + // /// }; + // /// + // /// let doc = RawDocument::from_document(&doc! { + // /// "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1, 2, 3] }, + // /// "bool": true, + // /// })?; + // /// + // /// assert_eq!(doc.get_binary("binary")?.map(RawBinary::as_bytes), Some(&[1, 2, 3][..])); + // /// assert!(matches!(doc.get_binary("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. + // })); /// assert!(doc.get_binary("unknown")?.is_none()); + // /// # Ok::<(), Error>(()) + // /// ``` + // pub fn get_binary<'a>(&'a self, key: &str) -> Result>> { + // self.get_with(key, RawBson::as_binary) + // } + + // /// Gets a reference to the ObjectId value corresponding to a given key or returns an error + // if /// the key corresponds to a value which isn't an ObjectId. + // /// + // /// ``` + // /// # use bson::raw::Error; + // /// use bson::{doc, oid::ObjectId, raw::{ErrorKind, RawDocument}}; + // /// + // /// let doc = RawDocument::from_document(&doc! { + // /// "_id": ObjectId::new(), + // /// "bool": true, + // /// })?; + // /// + // /// let oid = doc.get_object_id("_id")?.unwrap(); + // /// assert!(matches!(doc.get_object_id("bool").unwrap_err().kind, ErrorKind::UnexpectedType { + // .. })); /// assert!(doc.get_object_id("unknown")?.is_none()); + // /// # Ok::<(), Error>(()) + // /// ``` + // pub fn get_object_id(&self, key: &str) -> Result> { + // self.get_with(key, RawBson::as_object_id) + // } + + // /// Gets a reference to the boolean value corresponding to a given key or returns an error if + // /// the key corresponds to a value which isn't a boolean. + // /// + // /// ``` + // /// # use bson::raw::Error; + // /// use bson::{doc, oid::ObjectId, raw::{RawDocument, ErrorKind}}; + // /// + // /// let doc = RawDocument::from_document(&doc! { + // /// "_id": ObjectId::new(), + // /// "bool": true, + // /// })?; + // /// + // /// assert!(doc.get_bool("bool")?.unwrap()); + // /// assert!(matches!(doc.get_bool("_id").unwrap_err().kind, ErrorKind::UnexpectedType { .. + // })); /// assert!(doc.get_object_id("unknown")?.is_none()); + // /// # Ok::<(), Error>(()) + // /// ``` + // pub fn get_bool(&self, key: &str) -> Result> { + // self.get_with(key, RawBson::as_bool) + // } + + // /// Gets a reference to the BSON DateTime value corresponding to a given key or returns an + // error /// if the key corresponds to a value which isn't a DateTime. + // /// + // /// ``` + // /// # use bson::raw::Error; + // /// use bson::{doc, raw::{ErrorKind, RawDocument}, DateTime}; + // /// + // /// let dt = DateTime::now(); + // /// let doc = RawDocument::from_document(&doc! { + // /// "created_at": dt, + // /// "bool": true, + // /// })?; + // /// + // /// assert_eq!(doc.get_datetime("created_at")?, Some(dt)); + // /// assert!(matches!(doc.get_datetime("bool").unwrap_err().kind, ErrorKind::UnexpectedType { + // .. })); /// assert!(doc.get_datetime("unknown")?.is_none()); + // /// # Ok::<(), Error>(()) + // /// ``` + // pub fn get_datetime(&self, key: &str) -> Result> { + // self.get_with(key, RawBson::as_datetime) + // } + + // /// Gets a reference to the BSON regex value corresponding to a given key or returns an error + // if /// the key corresponds to a value which isn't a regex. + // /// + // /// ``` + // /// # use bson::raw::Error; + // /// use bson::{doc, Regex, raw::{RawDocument, ErrorKind}}; + // /// + // /// let doc = RawDocument::from_document(&doc! { + // /// "regex": Regex { + // /// pattern: r"end\s*$".into(), + // /// options: "i".into(), + // /// }, + // /// "bool": true, + // /// })?; + // /// + // /// assert_eq!(doc.get_regex("regex")?.unwrap().pattern(), r"end\s*$"); + // /// assert_eq!(doc.get_regex("regex")?.unwrap().options(), "i"); + // /// assert!(matches!(doc.get_regex("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. + // })); /// assert!(doc.get_regex("unknown")?.is_none()); + // /// # Ok::<(), Error>(()) + // /// ``` + // pub fn get_regex<'a>(&'a self, key: &str) -> Result>> { + // self.get_with(key, RawBson::as_regex) + // } + + // /// Gets a reference to the BSON timestamp value corresponding to a given key or returns an + // /// error if the key corresponds to a value which isn't a timestamp. + // /// + // /// ``` + // /// # use bson::raw::Error; + // /// use bson::{doc, Timestamp, raw::{RawDocument, ErrorKind}}; + // /// + // /// let doc = RawDocument::from_document(&doc! { + // /// "bool": true, + // /// "ts": Timestamp { time: 649876543, increment: 9 }, + // /// })?; + // /// + // /// let timestamp = doc.get_timestamp("ts")?.unwrap(); + // /// + // /// assert_eq!(timestamp.time(), 649876543); + // /// assert_eq!(timestamp.increment(), 9); + // /// assert!(matches!(doc.get_timestamp("bool").unwrap_err().kind, ErrorKind::UnexpectedType { + // .. })); /// assert_eq!(doc.get_timestamp("unknown"), Ok(None)); + // /// # Ok::<(), Error>(()) + // /// ``` + // pub fn get_timestamp<'a>(&'a self, key: &str) -> Result>> { + // self.get_with(key, RawBson::as_timestamp) + // } + + // /// Gets a reference to the BSON int32 value corresponding to a given key or returns an error + // if /// the key corresponds to a value which isn't a 32-bit integer. + // /// + // /// ``` + // /// # use bson::raw::Error; + // /// use bson::{doc, raw::{RawDocument, ErrorKind}}; + // /// + // /// let doc = RawDocument::from_document(&doc! { + // /// "bool": true, + // /// "i32": 1_000_000, + // /// })?; + // /// + // /// assert_eq!(doc.get_i32("i32"), Ok(Some(1_000_000))); + // /// assert!(matches!(doc.get_i32("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. + // })); /// assert_eq!(doc.get_i32("unknown"), Ok(None)); + // /// # Ok::<(), Error>(()) + // /// ``` + // pub fn get_i32(&self, key: &str) -> Result> { + // self.get_with(key, RawBson::as_i32) + // } + + // /// Gets a reference to the BSON int64 value corresponding to a given key or returns an error + // if /// the key corresponds to a value which isn't a 64-bit integer. + // /// + // /// ``` + // /// # use bson::raw::Error; + // /// use bson::{doc, raw::{ErrorKind, RawDocument}}; + // /// + // /// let doc = RawDocument::from_document(&doc! { + // /// "bool": true, + // /// "i64": 9223372036854775807_i64, + // /// })?; + // /// + // /// assert_eq!(doc.get_i64("i64"), Ok(Some(9223372036854775807))); + // /// assert!(matches!(doc.get_i64("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. + // })); /// assert_eq!(doc.get_i64("unknown"), Ok(None)); + // /// # Ok::<(), Error>(()) + // /// ``` + // pub fn get_i64(&self, key: &str) -> Result> { + // self.get_with(key, RawBson::as_i64) + // } /// Return a reference to the contained data as a `&[u8]` /// @@ -794,67 +795,82 @@ impl<'a> Iterator for Iter<'a> { } }; - let element_size = match element_type { - ElementType::Double => 8, + let (element, element_size) = match element_type { + ElementType::Double => { + todo!() + // Ok(f64::from_bits(u64::from_le_bytes( + // self.data.try_into().map_err(|_| Error { + // key: None, + // kind: ErrorKind::MalformedValue { + // message: "f64 should be 8 bytes long".into(), + // }, + // })?, + // ))) + } ElementType::String => { - let size = 4 + i32_from_slice(&self.doc.data[valueoffset..])? as usize; - self.verify_null_terminated(valueoffset..(valueoffset + size))?; - size + // let size = 4 + i32_from_slice(&self.doc.data[valueoffset..])? as usize; + // self.verify_null_terminated(valueoffset..(valueoffset + size))?; + let s = read_lenencoded(&self.doc.data[valueoffset..])?; + (RawBson::String(s), s.len() + 1) + // size } ElementType::EmbeddedDocument => { - let size = i32_from_slice(&self.doc.data[valueoffset..])? as usize; - self.verify_null_terminated(valueoffset..(valueoffset + size))?; - size - } - ElementType::Array => { - let size = i32_from_slice(&self.doc.data[valueoffset..])? as usize; - self.verify_null_terminated(valueoffset..(valueoffset + size))?; - size - } - ElementType::Binary => 5 + i32_from_slice(&self.doc.data[valueoffset..])? as usize, - ElementType::Undefined => 0, - ElementType::ObjectId => 12, - ElementType::Boolean => 1, - ElementType::DateTime => 8, - ElementType::Null => 0, - ElementType::RegularExpression => { - let regex = match read_nullterminated(&self.doc.data[valueoffset..]) { - Ok(regex) => regex, - Err(err) => return Err(err), - }; - - let options = match read_nullterminated( - &self.doc.data[valueoffset + regex.len() + 1..], - ) { - Ok(options) => options, - Err(err) => return Err(err), - }; - - regex.len() + options.len() + 2 - } - ElementType::DbPointer => { - let string_size = 4 + i32_from_slice(&self.doc.data[valueoffset..])? as usize; - let id_size = 12; - self.verify_null_terminated(valueoffset..(valueoffset + string_size))?; - string_size + id_size - } - ElementType::JavaScriptCode => { - let size = 4 + i32_from_slice(&self.doc.data[valueoffset..])? as usize; - self.verify_null_terminated(valueoffset..(valueoffset + size))?; - size - } - ElementType::Symbol => 4 + i32_from_slice(&self.doc.data[valueoffset..])? as usize, - ElementType::JavaScriptCodeWithScope => { - let size = i32_from_slice(&self.doc.data[valueoffset..])? as usize; - self.verify_null_terminated(valueoffset..(valueoffset + size))?; - size + // let size = i32_from_slice(&self.doc.data[valueoffset..])? as usize; + // self.verify_null_terminated(valueoffset..(valueoffset + size))?; + // size + let doc = RawDocumentRef::new(&self.doc.data[valueoffset..])?; + (RawBson::Document(doc), doc.as_bytes().len()) } - ElementType::Int32 => 4, - ElementType::Timestamp => 8, - ElementType::Int64 => 8, - ElementType::Decimal128 => 16, - ElementType::MaxKey => 0, - ElementType::MinKey => 0, + _ => todo!(), + /* ElementType::Array => { + * let size = i32_from_slice(&self.doc.data[valueoffset..])? as usize; + * self.verify_null_terminated(valueoffset..(valueoffset + size))?; + * size + * } + * ElementType::Binary => 5 + i32_from_slice(&self.doc.data[valueoffset..])? as + * usize, ElementType::Undefined => 0, + * ElementType::ObjectId => 12, + * ElementType::Boolean => 1, + * ElementType::DateTime => 8, + * ElementType::Null => 0, + * ElementType::RegularExpression => { + * let regex = match read_nullterminated(&self.doc.data[valueoffset..]) { + * Ok(regex) => regex, + * Err(err) => return Err(err), + * }; */ + + /* let options = match read_nullterminated( + * &self.doc.data[valueoffset + regex.len() + 1..], + * ) { + * Ok(options) => options, + * Err(err) => return Err(err), + * }; */ + + /* regex.len() + options.len() + 2 + * } + * ElementType::DbPointer => { + * let string_size = 4 + i32_from_slice(&self.doc.data[valueoffset..])? as + * usize; let id_size = 12; + * self.verify_null_terminated(valueoffset..(valueoffset + string_size))?; + * string_size + id_size + * } + * ElementType::JavaScriptCode => { + * let size = 4 + i32_from_slice(&self.doc.data[valueoffset..])? as usize; + * self.verify_null_terminated(valueoffset..(valueoffset + size))?; + * size + * } + * ElementType::Symbol => 4 + i32_from_slice(&self.doc.data[valueoffset..])? as + * usize, ElementType::JavaScriptCodeWithScope => { + * let size = i32_from_slice(&self.doc.data[valueoffset..])? as usize; + * self.verify_null_terminated(valueoffset..(valueoffset + size))?; + * size + * } + * ElementType::Int32 => 4, + * ElementType::Timestamp => 8, + * ElementType::Int64 => 8, + * ElementType::Decimal128 => 16, + * ElementType::MaxKey => 0, + * ElementType::MinKey => 0, */ }; let nextoffset = valueoffset + element_size; @@ -862,10 +878,7 @@ impl<'a> Iterator for Iter<'a> { self.verify_in_range(valueoffset..nextoffset)?; - Ok(( - key, - RawBson::new(element_type, &self.doc.data[valueoffset..nextoffset]), - )) + Ok((key, element)) }); Some(kvp_result) diff --git a/src/raw/elem.rs b/src/raw/elem.rs index a52716eb..e96c2456 100644 --- a/src/raw/elem.rs +++ b/src/raw/elem.rs @@ -1,7 +1,7 @@ use std::convert::{TryFrom, TryInto}; // use chrono::{DateTime, TimeZone, Utc}; -use crate::{de::read_bool, DateTime, Decimal128}; +use crate::{de::read_bool, oid, DateTime, DbPointer, Decimal128, Timestamp}; #[cfg(feature = "decimal128")] use super::d128_from_slice; @@ -24,277 +24,436 @@ use crate::{ }; /// A BSON value referencing raw bytes stored elsewhere. -#[derive(Clone, Copy, Debug)] -pub struct RawBson<'a> { - element_type: ElementType, - data: &'a [u8], +#[derive(Clone, Copy)] +pub enum RawBson<'a> { + /// 64-bit binary floating point + Double(f64), + /// UTF-8 string + String(&'a str), + /// Array + Array(&'a RawArray), + /// Embedded document + Document(&'a RawDocumentRef), + /// Boolean value + Boolean(bool), + /// Null value + Null, + /// Regular expression + RegularExpression(RawRegex<'a>), + /// JavaScript code + JavaScriptCode(&'a str), + /// JavaScript code w/ scope + JavaScriptCodeWithScope(RawJavaScriptCodeWithScope<'a>), + /// 32-bit signed integer + Int32(i32), + /// 64-bit signed integer + Int64(i64), + /// Timestamp + Timestamp(Timestamp), + /// Binary data + Binary(RawBinary<'a>), + /// [ObjectId](http://dochub.mongodb.org/core/objectids) + ObjectId(oid::ObjectId), + /// UTC datetime + DateTime(crate::DateTime), + /// Symbol (Deprecated) + Symbol(&'a str), + /// [128-bit decimal floating point](https://github.com/mongodb/specifications/blob/master/source/bson-decimal128/decimal128.rst) + Decimal128(Decimal128), + /// Undefined value (Deprecated) + Undefined, + /// Max key + MaxKey, + /// Min key + MinKey, + // TODO: this + /// DBPointer (Deprecated) + DbPointer(u8), } +// #[derive(Clone, Copy, Debug)] +// pub struct RawBson<'a> { +// element_type: ElementType, +// data: &'a [u8], +// } + impl<'a> RawBson<'a> { - pub(super) fn new(element_type: ElementType, data: &'a [u8]) -> RawBson<'a> { - RawBson { element_type, data } - } + // pub(super) fn new(element_type: ElementType, data: &'a [u8]) -> RawBson<'a> { + // RawBson { element_type, data } + // } /// Gets the type of the value. pub fn element_type(self) -> ElementType { - self.element_type - } - - /// Gets a reference to the raw bytes of the value. - pub fn as_bytes(self) -> &'a [u8] { - self.data - } - - fn validate_type(self, expected: ElementType) -> Result<()> { - if self.element_type != expected { - return Err(Error { - key: None, - kind: ErrorKind::UnexpectedType { - actual: self.element_type, - expected, - }, - }); - } - Ok(()) - } + // self.element_type + todo!() + } + + // /// Gets a reference to the raw bytes of the value. + // pub fn as_bytes(self) -> &'a [u8] { + // self.data + // } + + // fn validate_type(self, expected: ElementType) -> Result<()> { + // if self.element_type != expected { + // return Err(Error { + // key: None, + // kind: ErrorKind::UnexpectedType { + // actual: self.element_type, + // expected, + // }, + // }); + // } + // Ok(()) + // } /// Gets the f64 that's referenced or returns an error if the value isn't a BSON double. - pub fn as_f64(self) -> Result { - self.validate_type(ElementType::Double)?; - Ok(f64::from_bits(u64::from_le_bytes( - self.data.try_into().map_err(|_| Error { - key: None, - kind: ErrorKind::MalformedValue { - message: "f64 should be 8 bytes long".into(), - }, - })?, - ))) + pub fn as_f64(self) -> Option { + match self { + RawBson::Double(d) => Some(d), + _ => None, + } } - /// Gets the string that's referenced or returns an error if the value isn't a BSON string. - pub fn as_str(self) -> Result<&'a str> { - self.validate_type(ElementType::String)?; - read_lenencoded(self.data) + /// If `Bson` is `String`, return its value as a `&str`. Returns `None` otherwise + pub fn as_str(self) -> Option<&'a str> { + match self { + RawBson::String(s) => Some(s), + _ => None, + } } - /// Gets the document that's referenced or returns an error if the value isn't a BSON document. - pub fn as_document(self) -> Result<&'a RawDocumentRef> { - self.validate_type(ElementType::EmbeddedDocument)?; - RawDocumentRef::new(self.data) + /// If `Bson` is `Array`, return its value. Returns `None` otherwise + pub fn as_array(self) -> Option<&'a RawArray> { + match self { + RawBson::Array(v) => Some(v), + _ => None, + } } - /// Gets the array that's referenced or returns an error if the value isn't a BSON array. - pub fn as_array(self) -> Result<&'a RawArray> { - self.validate_type(ElementType::Array)?; - RawArray::new(self.data) + /// If `Bson` is `Document`, return its value. Returns `None` otherwise + pub fn as_document(self) -> Option<&'a RawDocumentRef> { + match self { + RawBson::Document(v) => Some(v), + _ => None, + } } - /// Gets the BSON binary value that's referenced or returns an error if the value a BSON binary. - pub fn as_binary(self) -> Result> { - self.validate_type(ElementType::Binary)?; - - let length = i32_from_slice(&self.data[0..4])?; - let subtype = BinarySubtype::from(self.data[4]); - if self.data.len() as i32 != length + 5 { - return Err(Error { - key: None, - kind: ErrorKind::MalformedValue { - message: "binary bson has wrong declared length".into(), - }, - }); + /// If `Bson` is `Bool`, return its value. Returns `None` otherwise + pub fn as_bool(self) -> Option { + match self { + RawBson::Boolean(v) => Some(v), + _ => None, } - let data = match subtype { - BinarySubtype::BinaryOld => { - if length < 4 { - return Err(Error::new_without_key(ErrorKind::MalformedValue { - message: "old binary subtype has no inner declared length".into(), - })); - } - let oldlength = i32_from_slice(&self.data[5..9])?; - if oldlength + 4 != length { - return Err(Error::new_without_key(ErrorKind::MalformedValue { - message: "old binary subtype has wrong inner declared length".into(), - })); - } - &self.data[9..] - } - _ => &self.data[5..], - }; - Ok(RawBinary::new(subtype, data)) } - /// Gets the ObjectId that's referenced or returns an error if the value isn't a BSON ObjectId. - pub fn as_object_id(self) -> Result { - self.validate_type(ElementType::ObjectId)?; - Ok(ObjectId::from_bytes(self.data.try_into().map_err( - |_| { - Error::new_without_key(ErrorKind::MalformedValue { - message: "object id should be 12 bytes long".into(), - }) - }, - )?)) + /// If `Bson` is `I32`, return its value. Returns `None` otherwise + pub fn as_i32(self) -> Option { + match self { + RawBson::Int32(v) => Some(v), + _ => None, + } } - /// Gets the boolean that's referenced or returns an error if the value isn't a BSON boolean. - pub fn as_bool(self) -> Result { - self.validate_type(ElementType::Boolean)?; - if self.data.len() != 1 { - Err(Error::new_without_key(ErrorKind::MalformedValue { - message: "boolean has length != 1".into(), - })) - } else { - read_bool(self.data).map_err(|e| { - Error::new_without_key(ErrorKind::MalformedValue { - message: e.to_string(), - }) - }) + /// If `Bson` is `I64`, return its value. Returns `None` otherwise + pub fn as_i64(self) -> Option { + match self { + RawBson::Int64(v) => Some(v), + _ => None, } } - /// Gets the DateTime that's referenced or returns an error if the value isn't a BSON DateTime. - pub fn as_datetime(self) -> Result { - self.validate_type(ElementType::DateTime)?; - let millis = i64_from_slice(self.data)?; - Ok(DateTime::from_millis(millis)) + /// If `Bson` is `Objectid`, return its value. Returns `None` otherwise + pub fn as_object_id(self) -> Option { + match self { + RawBson::ObjectId(v) => Some(v), + _ => None, + } } - /// Gets the regex that's referenced or returns an error if the value isn't a BSON regex. - pub fn as_regex(self) -> Result> { - self.validate_type(ElementType::RegularExpression)?; - RawRegex::new(self.data) + /// If `Bson` is `Binary`, return its value. Returns `None` otherwise + pub fn as_binary(self) -> Option> { + match self { + RawBson::Binary(v) => Some(v), + _ => None, + } } - /// Gets the BSON JavaScript code that's referenced or returns an error if the value isn't BSON - /// JavaScript code. - pub fn as_javascript(self) -> Result<&'a str> { - self.validate_type(ElementType::JavaScriptCode)?; - read_lenencoded(self.data) + /// If `Bson` is `Regex`, return its value. Returns `None` otherwise + pub fn as_regex(self) -> Option> { + match self { + RawBson::RegularExpression(v) => Some(v), + _ => None, + } } - /// Gets the symbol that's referenced or returns an error if the value isn't a BSON symbol. - pub fn as_symbol(self) -> Result<&'a str> { - self.validate_type(ElementType::Symbol)?; - read_lenencoded(self.data) + /// If `Bson` is `DateTime`, return its value. Returns `None` otherwise + pub fn as_datetime(self) -> Option { + match self { + RawBson::DateTime(v) => Some(v), + _ => None, + } } - /// Gets the BSON JavaScript code with scope that's referenced or returns an error if the value - /// isn't BSON JavaScript code with scope. - pub fn as_javascript_with_scope(self) -> Result> { - self.validate_type(ElementType::JavaScriptCodeWithScope)?; - let length = i32_from_slice(&self.data[..4])?; - - if (self.data.len() as i32) != length { - return Err(Error::new_without_key(ErrorKind::MalformedValue { - message: format!("TODO: Java"), - })); + /// If `Bson` is `Symbol`, return its value. Returns `None` otherwise + pub fn as_symbol(self) -> Option<&'a str> { + match self { + RawBson::Symbol(v) => Some(v), + _ => None, } - - let code = read_lenencoded(&self.data[4..])?; - let scope = RawDocumentRef::new(&self.data[9 + code.len()..])?; - - Ok(RawJavaScriptCodeWithScope { code, scope }) } - /// Gets the timestamp that's referenced or returns an error if the value isn't a BSON - /// timestamp. - pub fn as_timestamp(self) -> Result> { - self.validate_type(ElementType::Timestamp)?; - assert_eq!(self.data.len(), 8); - Ok(RawTimestamp { data: self.data }) + /// If `Bson` is `Timestamp`, return its value. Returns `None` otherwise + pub fn as_timestamp(self) -> Option { + match self { + RawBson::Timestamp(timestamp) => Some(timestamp), + _ => None, + } } - /// Gets the i32 that's referenced or returns an error if the value isn't a BSON int32. - pub fn as_i32(self) -> Result { - self.validate_type(ElementType::Int32)?; - i32_from_slice(self.data) + /// If `Bson` is `Null`, return its value. Returns `None` otherwise + pub fn as_null(self) -> Option<()> { + match self { + RawBson::Null => Some(()), + _ => None, + } } - /// Gets the i64 that's referenced or returns an error if the value isn't a BSON int64. - pub fn as_i64(self) -> Result { - self.validate_type(ElementType::Int64)?; - i64_from_slice(self.data) + pub fn as_db_pointer(self) -> Option { + // match self { + // Bson::DbPointer(db_pointer) => Some(db_pointer), + // _ => None, + // } + todo!() } - /// Gets the decimal that's referenced or returns an error if the value isn't a BSON Decimal128. - pub fn as_decimal128(self) -> Result { - self.validate_type(ElementType::Decimal128)?; - let bytes: [u8; 128 / 8] = self.data.try_into().map_err(|_| { - Error::new_without_key(ErrorKind::MalformedValue { - message: format!("decimal128 value has invalid length: {}", self.data.len()), - }) - })?; - Ok(Decimal128::from_bytes(bytes)) + /// If `Bson` is `JavaScriptCode`, return its value. Returns `None` otherwise + pub fn as_javascript(self) -> Option<&'a str> { + match self { + RawBson::JavaScriptCode(s) => Some(s), + _ => None, + } } - /// Gets the null value that's referenced or returns an error if the value isn't a BSON null. - pub fn as_null(self) -> Result<()> { - self.validate_type(ElementType::Null) + /// If `Bson` is `JavaScriptCodeWithScope`, return its value. Returns `None` otherwise + pub fn as_javascript_with_scope(self) -> Option> { + match self { + RawBson::JavaScriptCodeWithScope(s) => Some(s), + _ => None, + } } } +// impl<'a> RawBson<'a> { + +// /// Gets the string that's referenced or returns an error if the value isn't a BSON string. +// pub fn as_str(self) -> Result<&'a str> { +// self.validate_type(ElementType::String)?; +// read_lenencoded(self.data) +// } + +// /// Gets the document that's referenced or returns an error if the value isn't a BSON +// document. pub fn as_document(self) -> Result<&'a RawDocumentRef> { +// self.validate_type(ElementType::EmbeddedDocument)?; +// RawDocumentRef::new(self.data) +// } + +// /// Gets the array that's referenced or returns an error if the value isn't a BSON array. +// pub fn as_array(self) -> Result<&'a RawArray> { +// self.validate_type(ElementType::Array)?; +// RawArray::new(self.data) +// } + +// /// Gets the BSON binary value that's referenced or returns an error if the value a BSON +// binary. pub fn as_binary(self) -> Result> { +// self.validate_type(ElementType::Binary)?; + +// let length = i32_from_slice(&self.data[0..4])?; +// let subtype = BinarySubtype::from(self.data[4]); +// if self.data.len() as i32 != length + 5 { +// return Err(Error { +// key: None, +// kind: ErrorKind::MalformedValue { +// message: "binary bson has wrong declared length".into(), +// }, +// }); +// } +// let data = match subtype { +// BinarySubtype::BinaryOld => { +// if length < 4 { +// return Err(Error::new_without_key(ErrorKind::MalformedValue { +// message: "old binary subtype has no inner declared length".into(), +// })); +// } +// let oldlength = i32_from_slice(&self.data[5..9])?; +// if oldlength + 4 != length { +// return Err(Error::new_without_key(ErrorKind::MalformedValue { +// message: "old binary subtype has wrong inner declared length".into(), +// })); +// } +// &self.data[9..] +// } +// _ => &self.data[5..], +// }; +// Ok(RawBinary::new(subtype, data)) +// } + +// /// Gets the ObjectId that's referenced or returns an error if the value isn't a BSON +// ObjectId. pub fn as_object_id(self) -> Result { +// self.validate_type(ElementType::ObjectId)?; +// Ok(ObjectId::from_bytes(self.data.try_into().map_err( +// |_| { +// Error::new_without_key(ErrorKind::MalformedValue { +// message: "object id should be 12 bytes long".into(), +// }) +// }, +// )?)) +// } + +// /// Gets the boolean that's referenced or returns an error if the value isn't a BSON boolean. +// pub fn as_bool(self) -> Result { +// self.validate_type(ElementType::Boolean)?; +// if self.data.len() != 1 { +// Err(Error::new_without_key(ErrorKind::MalformedValue { +// message: "boolean has length != 1".into(), +// })) +// } else { +// read_bool(self.data).map_err(|e| { +// Error::new_without_key(ErrorKind::MalformedValue { +// message: e.to_string(), +// }) +// }) +// } +// } + +// /// Gets the DateTime that's referenced or returns an error if the value isn't a BSON +// DateTime. pub fn as_datetime(self) -> Result { +// self.validate_type(ElementType::DateTime)?; +// let millis = i64_from_slice(self.data)?; +// Ok(DateTime::from_millis(millis)) +// } + +// /// Gets the regex that's referenced or returns an error if the value isn't a BSON regex. +// pub fn as_regex(self) -> Result> { +// self.validate_type(ElementType::RegularExpression)?; +// RawRegex::new(self.data) +// } + +// /// Gets the BSON JavaScript code that's referenced or returns an error if the value isn't +// BSON /// JavaScript code. +// pub fn as_javascript(self) -> Result<&'a str> { +// self.validate_type(ElementType::JavaScriptCode)?; +// read_lenencoded(self.data) +// } + +// /// Gets the symbol that's referenced or returns an error if the value isn't a BSON symbol. +// pub fn as_symbol(self) -> Result<&'a str> { +// self.validate_type(ElementType::Symbol)?; +// read_lenencoded(self.data) +// } + +// /// Gets the BSON JavaScript code with scope that's referenced or returns an error if the +// value /// isn't BSON JavaScript code with scope. +// pub fn as_javascript_with_scope(self) -> Result> { +// self.validate_type(ElementType::JavaScriptCodeWithScope)?; +// let length = i32_from_slice(&self.data[..4])?; + +// if (self.data.len() as i32) != length { +// return Err(Error::new_without_key(ErrorKind::MalformedValue { +// message: format!("TODO: Java"), +// })); +// } + +// let code = read_lenencoded(&self.data[4..])?; +// let scope = RawDocumentRef::new(&self.data[9 + code.len()..])?; + +// Ok(RawJavaScriptCodeWithScope { code, scope }) +// } + +// /// Gets the timestamp that's referenced or returns an error if the value isn't a BSON +// /// timestamp. +// pub fn as_timestamp(self) -> Result> { +// self.validate_type(ElementType::Timestamp)?; +// assert_eq!(self.data.len(), 8); +// Ok(RawTimestamp { data: self.data }) +// } + +// /// Gets the i32 that's referenced or returns an error if the value isn't a BSON int32. +// pub fn as_i32(self) -> Result { +// self.validate_type(ElementType::Int32)?; +// i32_from_slice(self.data) +// } + +// /// Gets the i64 that's referenced or returns an error if the value isn't a BSON int64. +// pub fn as_i64(self) -> Result { +// self.validate_type(ElementType::Int64)?; +// i64_from_slice(self.data) +// } + +// /// Gets the decimal that's referenced or returns an error if the value isn't a BSON +// Decimal128. pub fn as_decimal128(self) -> Result { +// self.validate_type(ElementType::Decimal128)?; +// let bytes: [u8; 128 / 8] = self.data.try_into().map_err(|_| { +// Error::new_without_key(ErrorKind::MalformedValue { +// message: format!("decimal128 value has invalid length: {}", self.data.len()), +// }) +// })?; +// Ok(Decimal128::from_bytes(bytes)) +// } + +// /// Gets the null value that's referenced or returns an error if the value isn't a BSON null. +// pub fn as_null(self) -> Result<()> { +// self.validate_type(ElementType::Null) +// } +// } + // TODO: finish implementation impl<'a> TryFrom> for Bson { type Error = Error; fn try_from(rawbson: RawBson<'a>) -> Result { - Ok(match rawbson.element_type { - ElementType::Double => Bson::Double(rawbson.as_f64()?), - ElementType::String => Bson::String(String::from(rawbson.as_str()?)), - ElementType::EmbeddedDocument => { - let rawdoc = rawbson.as_document()?; + Ok(match rawbson { + RawBson::Double(d) => Bson::Double(d), + RawBson::String(s) => Bson::String(s.to_string()), + RawBson::Document(rawdoc) => { let doc = rawdoc.try_into()?; Bson::Document(doc) } - ElementType::Array => { - let rawarray = rawbson.as_array()?; - let v = rawarray.try_into()?; - Bson::Array(v) + RawBson::Array(rawarray) => { + todo!() + // let v = rawarray.try_into()?; + // Bson::Array(v) } - ElementType::Binary => { - let RawBinary { subtype, data } = rawbson.as_binary()?; + RawBson::Binary(rawbson) => { + let RawBinary { subtype, data } = rawbson; Bson::Binary(crate::Binary { subtype, bytes: data.to_vec(), }) } - ElementType::ObjectId => Bson::ObjectId(rawbson.as_object_id()?), - ElementType::Boolean => Bson::Boolean(rawbson.as_bool()?), - ElementType::DateTime => Bson::DateTime(rawbson.as_datetime()?), - ElementType::Null => Bson::Null, - ElementType::RegularExpression => { - let rawregex = rawbson.as_regex()?; - Bson::RegularExpression(crate::Regex { - pattern: String::from(rawregex.pattern()), - options: String::from(rawregex.options()), - }) - } - ElementType::JavaScriptCode => { - Bson::JavaScriptCode(String::from(rawbson.as_javascript()?)) - } - ElementType::Int32 => Bson::Int32(rawbson.as_i32()?), - ElementType::Timestamp => { - // RawBson::as_timestamp() returns u64, but Bson::Timestamp expects i64 - let ts = rawbson.as_timestamp()?; - Bson::Timestamp(crate::Timestamp { - time: ts.time(), - increment: ts.increment(), - }) - } - ElementType::Int64 => Bson::Int64(rawbson.as_i64()?), - ElementType::Undefined => Bson::Null, - ElementType::DbPointer => panic!("Uh oh. Maybe this should be a TryFrom"), - ElementType::Symbol => Bson::Symbol(String::from(rawbson.as_symbol()?)), - ElementType::JavaScriptCodeWithScope => { - let RawJavaScriptCodeWithScope { code, scope } = - rawbson.as_javascript_with_scope()?; + RawBson::ObjectId(rawbson) => Bson::ObjectId(rawbson), + RawBson::Boolean(rawbson) => Bson::Boolean(rawbson), + RawBson::DateTime(rawbson) => Bson::DateTime(rawbson), + RawBson::Null => Bson::Null, + RawBson::RegularExpression(rawregex) => Bson::RegularExpression(crate::Regex { + pattern: String::from(rawregex.pattern()), + options: String::from(rawregex.options()), + }), + RawBson::JavaScriptCode(rawbson) => Bson::JavaScriptCode(rawbson.to_string()), + RawBson::Int32(rawbson) => Bson::Int32(rawbson), + RawBson::Timestamp(rawbson) => Bson::Timestamp(rawbson), + RawBson::Int64(rawbson) => Bson::Int64(rawbson), + RawBson::Undefined => Bson::Undefined, + RawBson::DbPointer(rawbson) => todo!("TODO finish this"), + RawBson::Symbol(rawbson) => Bson::Symbol(rawbson.to_string()), + RawBson::JavaScriptCodeWithScope(rawbson) => { Bson::JavaScriptCodeWithScope(crate::JavaScriptCodeWithScope { - code: String::from(code), - scope: scope.try_into()?, + code: rawbson.code.to_string(), + scope: rawbson.scope.try_into()?, }) } - ElementType::Decimal128 => Bson::Decimal128(rawbson.as_decimal128()?), - ElementType::MaxKey => unimplemented!(), - ElementType::MinKey => unimplemented!(), + RawBson::Decimal128(rawbson) => Bson::Decimal128(rawbson), + RawBson::MaxKey => Bson::MaxKey, + RawBson::MinKey => Bson::MinKey, }) } } diff --git a/src/raw/test/mod.rs b/src/raw/test/mod.rs index d1ae45dd..9def612b 100644 --- a/src/raw/test/mod.rs +++ b/src/raw/test/mod.rs @@ -346,8 +346,8 @@ fn timestamp() { .as_timestamp() .expect("was not a timestamp"); - assert_eq!(ts.increment(), 7); - assert_eq!(ts.time(), 3542578); + assert_eq!(ts.increment, 7); + assert_eq!(ts.time, 3542578); } #[test] @@ -416,7 +416,7 @@ fn into_bson_conversion() { "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] }, "boolean": false, }); - let rawbson = elem::RawBson::new(ElementType::EmbeddedDocument, &docbytes); + let rawbson = RawBson::Document(RawDocumentRef::new(docbytes.as_slice()).unwrap()); let b: Bson = rawbson.try_into().expect("invalid bson"); let doc = b.as_document().expect("not a document"); assert_eq!(*doc.get("f64").expect("f64 not found"), Bson::Double(2.5)); From 24dc86a7a21d6b9c0536f1aa5c8b90e5ee5c5a68 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Mon, 11 Oct 2021 20:01:31 -0400 Subject: [PATCH 27/48] all but dbpointer done --- src/raw/array.rs | 4 +- src/raw/doc.rs | 227 +++++++++++++++++++++++++++++++------------- src/raw/elem.rs | 13 ++- src/raw/mod.rs | 15 +++ src/raw/test/mod.rs | 24 +++-- 5 files changed, 200 insertions(+), 83 deletions(-) diff --git a/src/raw/array.rs b/src/raw/array.rs index 50db8968..07f5ccde 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -6,7 +6,7 @@ use crate::{oid::ObjectId, Bson, DateTime, Timestamp}; /// A BSON array referencing raw bytes stored elsewhere. #[repr(transparent)] pub struct RawArray { - doc: RawDocumentRef, + pub(crate) doc: RawDocumentRef, } impl RawArray { @@ -14,7 +14,7 @@ impl RawArray { Ok(RawArray::from_doc(RawDocumentRef::new(data)?)) } - fn from_doc(doc: &RawDocumentRef) -> &RawArray { + pub(crate) fn from_doc(doc: &RawDocumentRef) -> &RawArray { // SAFETY: // // Dereferencing a raw pointer requires unsafe due to the potential that the pointer is diff --git a/src/raw/doc.rs b/src/raw/doc.rs index 2191ca04..2d9330aa 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -5,8 +5,17 @@ use std::{ }; use crate::{ - raw::error::{try_with_key, ErrorKind}, + de::read_bool, + raw::{ + error::{try_with_key, ErrorKind}, + f64_from_slice, + i64_from_slice, + RawJavaScriptCodeWithScope, + }, + spec::BinarySubtype, DateTime, + Decimal128, + Timestamp, }; use super::{ @@ -298,6 +307,7 @@ impl RawDocumentRef { } let length = i32_from_slice(&data)?; + println!("got length {}", length); if data.len() as i32 != length { return Err(Error { @@ -752,6 +762,23 @@ impl<'a> Iter<'a> { } Ok(()) } + + fn next_oid(&self, starting_at: usize) -> Result { + self.verify_in_range(starting_at..(starting_at + 12))?; + let oid = ObjectId::from_bytes( + self.doc.data[starting_at..(starting_at + 12)] + .try_into() + .unwrap(), // ok because we know slice is 12 bytes long + ); + Ok(oid) + } + + fn next_document(&self, starting_at: usize) -> Result<&'a RawDocumentRef> { + let size = i32_from_slice(&self.doc.data[starting_at..])? as usize; + let range = starting_at..(starting_at + size); + self.verify_null_terminated(range.clone())?; + RawDocumentRef::new(&self.doc.data[range]) + } } impl<'a> Iterator for Iter<'a> { @@ -780,6 +807,8 @@ impl<'a> Iterator for Iter<'a> { Err(e) => return Some(Err(e)), }; + println!("iterating {}", key); + let kvp_result = try_with_key(key, || { let valueoffset = self.offset + 1 + key.len() + 1; // type specifier + key + \0 @@ -795,82 +824,144 @@ impl<'a> Iterator for Iter<'a> { } }; + println!("et: {:?}", element_type); + let (element, element_size) = match element_type { + ElementType::Int32 => { + let i = i32_from_slice(&self.doc.data[valueoffset..])?; + (RawBson::Int32(i), 4) + } + ElementType::Int64 => { + let i = i64_from_slice(&self.doc.data[valueoffset..])?; + (RawBson::Int64(i), 8) + } ElementType::Double => { - todo!() - // Ok(f64::from_bits(u64::from_le_bytes( - // self.data.try_into().map_err(|_| Error { - // key: None, - // kind: ErrorKind::MalformedValue { - // message: "f64 should be 8 bytes long".into(), - // }, - // })?, - // ))) + let f = f64_from_slice(&self.doc.data[valueoffset..])?; + (RawBson::Double(f), 8) } ElementType::String => { - // let size = 4 + i32_from_slice(&self.doc.data[valueoffset..])? as usize; - // self.verify_null_terminated(valueoffset..(valueoffset + size))?; let s = read_lenencoded(&self.doc.data[valueoffset..])?; - (RawBson::String(s), s.len() + 1) - // size + (RawBson::String(s), 4 + s.len() + 1) } ElementType::EmbeddedDocument => { - // let size = i32_from_slice(&self.doc.data[valueoffset..])? as usize; - // self.verify_null_terminated(valueoffset..(valueoffset + size))?; - // size - let doc = RawDocumentRef::new(&self.doc.data[valueoffset..])?; + let doc = self.next_document(valueoffset)?; (RawBson::Document(doc), doc.as_bytes().len()) } - _ => todo!(), - /* ElementType::Array => { - * let size = i32_from_slice(&self.doc.data[valueoffset..])? as usize; - * self.verify_null_terminated(valueoffset..(valueoffset + size))?; - * size - * } - * ElementType::Binary => 5 + i32_from_slice(&self.doc.data[valueoffset..])? as - * usize, ElementType::Undefined => 0, - * ElementType::ObjectId => 12, - * ElementType::Boolean => 1, - * ElementType::DateTime => 8, - * ElementType::Null => 0, - * ElementType::RegularExpression => { - * let regex = match read_nullterminated(&self.doc.data[valueoffset..]) { - * Ok(regex) => regex, - * Err(err) => return Err(err), - * }; */ - - /* let options = match read_nullterminated( - * &self.doc.data[valueoffset + regex.len() + 1..], - * ) { - * Ok(options) => options, - * Err(err) => return Err(err), - * }; */ - - /* regex.len() + options.len() + 2 - * } - * ElementType::DbPointer => { - * let string_size = 4 + i32_from_slice(&self.doc.data[valueoffset..])? as - * usize; let id_size = 12; - * self.verify_null_terminated(valueoffset..(valueoffset + string_size))?; - * string_size + id_size - * } - * ElementType::JavaScriptCode => { - * let size = 4 + i32_from_slice(&self.doc.data[valueoffset..])? as usize; - * self.verify_null_terminated(valueoffset..(valueoffset + size))?; - * size - * } - * ElementType::Symbol => 4 + i32_from_slice(&self.doc.data[valueoffset..])? as - * usize, ElementType::JavaScriptCodeWithScope => { - * let size = i32_from_slice(&self.doc.data[valueoffset..])? as usize; - * self.verify_null_terminated(valueoffset..(valueoffset + size))?; - * size - * } - * ElementType::Int32 => 4, - * ElementType::Timestamp => 8, - * ElementType::Int64 => 8, - * ElementType::Decimal128 => 16, - * ElementType::MaxKey => 0, - * ElementType::MinKey => 0, */ + ElementType::Array => { + let doc = self.next_document(valueoffset)?; + ( + RawBson::Array(RawArray::from_doc(doc)), + doc.as_bytes().len(), + ) + } + ElementType::Binary => { + let len = i32_from_slice(&self.doc.data[valueoffset..])? as usize; + let data_start = valueoffset + 4 + 1; + self.verify_in_range(valueoffset..(data_start + len))?; + let subtype = BinarySubtype::from(self.doc.data[valueoffset + 4]); + let data = match subtype { + BinarySubtype::BinaryOld => { + if len < 4 { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: "old binary subtype has no inner declared length" + .into(), + })); + } + let oldlength = i32_from_slice(&self.doc.data[data_start..])? as usize; + if oldlength + 4 != len { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: "old binary subtype has wrong inner declared length" + .into(), + })); + } + &self.doc.data[(data_start + 4)..(data_start + len)] + } + _ => &self.doc.data[data_start..(data_start + len)], + }; + (RawBson::Binary(RawBinary { subtype, data }), 4 + 1 + len) + } + ElementType::ObjectId => { + let oid = self.next_oid(valueoffset)?; + (RawBson::ObjectId(oid), 12) + } + ElementType::Boolean => { + let b = read_bool(&self.doc.data[valueoffset..]).map_err(|e| { + Error::new_with_key( + key, + ErrorKind::MalformedValue { + message: e.to_string(), + }, + ) + })?; + (RawBson::Boolean(b), 1) + } + ElementType::DateTime => { + let ms = i64_from_slice(&self.doc.data[valueoffset..])?; + (RawBson::DateTime(DateTime::from_millis(ms)), 8) + } + ElementType::RegularExpression => { + let pattern = read_nullterminated(&self.doc.data[valueoffset..])?; + let options = + read_nullterminated(&self.doc.data[(valueoffset + pattern.len() + 1)..])?; + ( + RawBson::RegularExpression(RawRegex { pattern, options }), + pattern.len() + 1 + options.len() + 1, + ) + } + ElementType::Null => (RawBson::Null, 0), + ElementType::Undefined => (RawBson::Undefined, 0), + ElementType::Timestamp => { + let ts = + Timestamp::from_reader(&self.doc.data[valueoffset..]).map_err(|e| { + Error::new_without_key(ErrorKind::MalformedValue { + message: e.to_string(), + }) + })?; + (RawBson::Timestamp(ts), 8) + } + ElementType::JavaScriptCode => { + let code = read_lenencoded(&self.doc.data[valueoffset..])?; + (RawBson::JavaScriptCode(code), 4 + code.len() + 1) + } + ElementType::JavaScriptCodeWithScope => { + let length = i32_from_slice(&self.doc.data[valueoffset..])? as usize; + self.verify_in_range(valueoffset..(valueoffset + length))?; + let code = read_lenencoded(&self.doc.data[valueoffset + 4..])?; + + let scope_start = valueoffset + 4 + 4 + code.len() + 1; + let scope = + RawDocumentRef::new(&self.doc.data[scope_start..(valueoffset + length)])?; + ( + RawBson::JavaScriptCodeWithScope(RawJavaScriptCodeWithScope { + code, + scope, + }), + length, + ) + } + ElementType::DbPointer => { + let ns = read_lenencoded(&self.doc.data[valueoffset..])?; + let oid = self.next_oid(valueoffset + 4 + ns.len() + 1)?; + // TODO: fix this + (RawBson::DbPointer(0), 4 + ns.len() + 1 + 12) + } + ElementType::Symbol => { + let s = read_lenencoded(&self.doc.data[valueoffset..])?; + (RawBson::Symbol(s), 4 + s.len() + 1) + } + ElementType::Decimal128 => { + self.verify_in_range(valueoffset..(valueoffset + 16))?; + ( + RawBson::Decimal128(Decimal128::from_bytes( + self.doc.data[valueoffset..(valueoffset + 16)] + .try_into() + .unwrap(), + )), + 16, + ) + } + ElementType::MinKey => (RawBson::MinKey, 0), + ElementType::MaxKey => (RawBson::MaxKey, 0), }; let nextoffset = valueoffset + element_size; diff --git a/src/raw/elem.rs b/src/raw/elem.rs index e96c2456..56b1cd20 100644 --- a/src/raw/elem.rs +++ b/src/raw/elem.rs @@ -1,7 +1,7 @@ use std::convert::{TryFrom, TryInto}; // use chrono::{DateTime, TimeZone, Utc}; -use crate::{de::read_bool, oid, DateTime, DbPointer, Decimal128, Timestamp}; +use crate::{de::read_bool, oid, DateTime, DbPointer, Decimal128, Document, Timestamp}; #[cfg(feature = "decimal128")] use super::d128_from_slice; @@ -419,9 +419,8 @@ impl<'a> TryFrom> for Bson { Bson::Document(doc) } RawBson::Array(rawarray) => { - todo!() - // let v = rawarray.try_into()?; - // Bson::Array(v) + let doc: Document = rawarray.doc.try_into()?; + Bson::Array(doc.into_iter().map(|(_k, v)| v).collect()) } RawBson::Binary(rawbson) => { let RawBinary { subtype, data } = rawbson; @@ -459,7 +458,7 @@ impl<'a> TryFrom> for Bson { } /// A BSON binary value referencing raw bytes stored elsewhere. -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq)] pub struct RawBinary<'a> { pub(super) subtype: BinarySubtype, pub(super) data: &'a [u8], @@ -540,8 +539,8 @@ impl<'a> RawTimestamp<'a> { /// A BSON "code with scope" value referencing raw bytes stored elsewhere. #[derive(Clone, Copy, Debug)] pub struct RawJavaScriptCodeWithScope<'a> { - code: &'a str, - scope: &'a RawDocumentRef, + pub(crate) code: &'a str, + pub(crate) scope: &'a RawDocumentRef, } impl<'a> RawJavaScriptCodeWithScope<'a> { diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 58069d35..91755a45 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -116,6 +116,21 @@ pub use self::{ error::{Error, ErrorKind, Result}, }; +/// Given a u8 slice, return an i32 calculated from the first four bytes in +/// little endian order. +fn f64_from_slice(val: &[u8]) -> Result { + let arr = val + .get(0..8) + .and_then(|s| s.try_into().ok()) + .ok_or_else(|| Error { + key: None, + kind: ErrorKind::MalformedValue { + message: format!("expected 8 bytes to read double, instead got {}", val.len()), + }, + })?; + Ok(f64::from_le_bytes(arr)) +} + /// Given a u8 slice, return an i32 calculated from the first four bytes in /// little endian order. fn i32_from_slice(val: &[u8]) -> Result { diff --git a/src/raw/test/mod.rs b/src/raw/test/mod.rs index 9def612b..25de2a9b 100644 --- a/src/raw/test/mod.rs +++ b/src/raw/test/mod.rs @@ -39,16 +39,18 @@ fn nested_document() { let docbytes = to_bytes(&doc! { "outer": { "inner": "surprise", + "double": 5.5, }, }); let rawdoc = RawDocumentRef::new(&docbytes).unwrap(); + let subdoc = rawdoc + .get("outer") + .expect("get doc result") + .expect("get doc option") + .as_document() + .expect("as doc"); assert_eq!( - rawdoc - .get("outer") - .expect("get doc result") - .expect("get doc option") - .as_document() - .expect("as doc") + subdoc .get("inner") .expect("get str result") .expect("get str option") @@ -56,6 +58,16 @@ fn nested_document() { .expect("as str"), "surprise", ); + + assert_eq!( + subdoc + .get("double") + .expect("get double result") + .expect("get double option") + .as_f64() + .expect("as f64 result"), + 5.5 + ); } #[test] From 4e48b01ca918587a312298c241a0d59f98dce431 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Mon, 11 Oct 2021 20:08:53 -0400 Subject: [PATCH 28/48] implement dbpointer, cleanup --- src/raw/array.rs | 6 +--- src/raw/doc.rs | 12 +++++--- src/raw/elem.rs | 80 ++++++++++-------------------------------------- src/raw/mod.rs | 17 +--------- 4 files changed, 26 insertions(+), 89 deletions(-) diff --git a/src/raw/array.rs b/src/raw/array.rs index 07f5ccde..e33022a9 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -1,6 +1,6 @@ use std::convert::TryFrom; -use super::{Error, Iter, RawBinary, RawBson, RawDocumentRef, RawRegex, RawTimestamp, Result}; +use super::{Error, Iter, RawBinary, RawBson, RawDocumentRef, RawRegex, Result}; use crate::{oid::ObjectId, Bson, DateTime, Timestamp}; /// A BSON array referencing raw bytes stored elsewhere. @@ -10,10 +10,6 @@ pub struct RawArray { } impl RawArray { - pub(super) fn new(data: &[u8]) -> Result<&RawArray> { - Ok(RawArray::from_doc(RawDocumentRef::new(data)?)) - } - pub(crate) fn from_doc(doc: &RawDocumentRef) -> &RawArray { // SAFETY: // diff --git a/src/raw/doc.rs b/src/raw/doc.rs index 2d9330aa..36f0134c 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -7,6 +7,7 @@ use std::{ use crate::{ de::read_bool, raw::{ + elem::RawDbPointer, error::{try_with_key, ErrorKind}, f64_from_slice, i64_from_slice, @@ -27,7 +28,6 @@ use super::{ RawBinary, RawBson, RawRegex, - RawTimestamp, Result, }; #[cfg(feature = "decimal128")] @@ -940,10 +940,12 @@ impl<'a> Iterator for Iter<'a> { ) } ElementType::DbPointer => { - let ns = read_lenencoded(&self.doc.data[valueoffset..])?; - let oid = self.next_oid(valueoffset + 4 + ns.len() + 1)?; - // TODO: fix this - (RawBson::DbPointer(0), 4 + ns.len() + 1 + 12) + let namespace = read_lenencoded(&self.doc.data[valueoffset..])?; + let id = self.next_oid(valueoffset + 4 + namespace.len() + 1)?; + ( + RawBson::DbPointer(RawDbPointer { namespace, id }), + 4 + namespace.len() + 1 + 12, + ) } ElementType::Symbol => { let s = read_lenencoded(&self.doc.data[valueoffset..])?; diff --git a/src/raw/elem.rs b/src/raw/elem.rs index 56b1cd20..ac00af4c 100644 --- a/src/raw/elem.rs +++ b/src/raw/elem.rs @@ -1,26 +1,14 @@ use std::convert::{TryFrom, TryInto}; -// use chrono::{DateTime, TimeZone, Utc}; -use crate::{de::read_bool, oid, DateTime, DbPointer, Decimal128, Document, Timestamp}; - -#[cfg(feature = "decimal128")] -use super::d128_from_slice; -use super::{ - error::ErrorKind, - i32_from_slice, - i64_from_slice, - read_lenencoded, - read_nullterminated, - u32_from_slice, - Error, - RawArray, - RawDocumentRef, - Result, -}; +use super::{Error, RawArray, RawDocumentRef, Result}; use crate::{ - oid::ObjectId, + oid::{self, ObjectId}, spec::{BinarySubtype, ElementType}, Bson, + DbPointer, + Decimal128, + Document, + Timestamp, }; /// A BSON value referencing raw bytes stored elsewhere. @@ -66,9 +54,8 @@ pub enum RawBson<'a> { MaxKey, /// Min key MinKey, - // TODO: this /// DBPointer (Deprecated) - DbPointer(u8), + DbPointer(RawDbPointer<'a>), } // #[derive(Clone, Copy, Debug)] @@ -406,7 +393,6 @@ impl<'a> RawBson<'a> { // } // } -// TODO: finish implementation impl<'a> TryFrom> for Bson { type Error = Error; @@ -442,7 +428,10 @@ impl<'a> TryFrom> for Bson { RawBson::Timestamp(rawbson) => Bson::Timestamp(rawbson), RawBson::Int64(rawbson) => Bson::Int64(rawbson), RawBson::Undefined => Bson::Undefined, - RawBson::DbPointer(rawbson) => todo!("TODO finish this"), + RawBson::DbPointer(rawbson) => Bson::DbPointer(DbPointer { + namespace: rawbson.namespace.to_string(), + id: rawbson.id, + }), RawBson::Symbol(rawbson) => Bson::Symbol(rawbson.to_string()), RawBson::JavaScriptCodeWithScope(rawbson) => { Bson::JavaScriptCodeWithScope(crate::JavaScriptCodeWithScope { @@ -465,10 +454,6 @@ pub struct RawBinary<'a> { } impl<'a> RawBinary<'a> { - fn new(subtype: BinarySubtype, data: &'a [u8]) -> RawBinary<'a> { - RawBinary { subtype, data } - } - /// Gets the subtype of the binary value. pub fn subtype(self) -> BinarySubtype { self.subtype @@ -488,21 +473,6 @@ pub struct RawRegex<'a> { } impl<'a> RawRegex<'a> { - pub(super) fn new(data: &'a [u8]) -> Result> { - let pattern = read_nullterminated(data)?; - let opts = read_nullterminated(&data[pattern.len() + 1..])?; - if pattern.len() + opts.len() == data.len() - 2 { - Ok(RawRegex { - pattern, - options: opts, - }) - } else { - Err(Error::new_without_key(ErrorKind::MalformedValue { - message: "expected two null-terminated strings".into(), - })) - } - } - /// Gets the pattern portion of the regex. pub fn pattern(self) -> &'a str { self.pattern @@ -514,28 +484,6 @@ impl<'a> RawRegex<'a> { } } -/// A BSON timestamp referencing raw bytes stored elsewhere. -#[derive(Clone, Copy, Debug, PartialEq)] -pub struct RawTimestamp<'a> { - data: &'a [u8], -} - -impl<'a> RawTimestamp<'a> { - /// Gets the time portion of the timestamp. - pub fn time(&self) -> u32 { - // RawBsonTimestamp can only be constructed with the correct data length, so this should - // always succeed. - u32_from_slice(&self.data[4..8]).unwrap() - } - - /// Gets the increment portion of the timestamp. - pub fn increment(&self) -> u32 { - // RawBsonTimestamp can only be constructed with the correct data length, so this should - // always succeed. - u32_from_slice(&self.data[0..4]).unwrap() - } -} - /// A BSON "code with scope" value referencing raw bytes stored elsewhere. #[derive(Clone, Copy, Debug)] pub struct RawJavaScriptCodeWithScope<'a> { @@ -554,3 +502,9 @@ impl<'a> RawJavaScriptCodeWithScope<'a> { self.scope } } + +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct RawDbPointer<'a> { + pub(crate) namespace: &'a str, + pub(crate) id: ObjectId, +} diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 91755a45..040cd8b3 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -112,7 +112,7 @@ use std::convert::TryInto; pub use self::{ array::{RawArray, RawArrayIter}, doc::{Iter, RawDocument, RawDocumentRef}, - elem::{RawBinary, RawBson, RawJavaScriptCodeWithScope, RawRegex, RawTimestamp}, + elem::{RawBinary, RawBson, RawJavaScriptCodeWithScope, RawRegex}, error::{Error, ErrorKind, Result}, }; @@ -161,21 +161,6 @@ fn i64_from_slice(val: &[u8]) -> Result { Ok(i64::from_le_bytes(arr)) } -/// Given a 4 byte u8 slice, return a u32 calculated from the first 4 bytes in -/// little endian order. -fn u32_from_slice(val: &[u8]) -> Result { - let arr = val - .get(0..4) - .and_then(|s| s.try_into().ok()) - .ok_or_else(|| Error { - key: None, - kind: ErrorKind::MalformedValue { - message: format!("expected 4 bytes to read u32, instead got {}", val.len()), - }, - })?; - Ok(u32::from_le_bytes(arr)) -} - fn read_nullterminated(buf: &[u8]) -> Result<&str> { let mut splits = buf.splitn(2, |x| *x == 0); let value = splits.next().ok_or_else(|| Error { From be6b9fe5c704d9df5293fc9a79ef08393fe8ea61 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Mon, 11 Oct 2021 20:44:15 -0400 Subject: [PATCH 29/48] corpus wip --- src/bson.rs | 20 +++++++++++------ src/extjson/models.rs | 9 +------- src/raw/elem.rs | 17 +++++++++------ src/raw/test/mod.rs | 2 +- src/tests/spec/corpus.rs | 46 +++++++++++++++++++++++++++++++++++++++- 5 files changed, 70 insertions(+), 24 deletions(-) diff --git a/src/bson.rs b/src/bson.rs index 6582bb7b..e52ef744 100644 --- a/src/bson.rs +++ b/src/bson.rs @@ -730,13 +730,10 @@ impl Bson { if let Ok(regex) = doc.get_document("$regularExpression") { if let Ok(pattern) = regex.get_str("pattern") { if let Ok(options) = regex.get_str("options") { - let mut options: Vec<_> = options.chars().collect(); - options.sort_unstable(); - - return Bson::RegularExpression(Regex { - pattern: pattern.into(), - options: options.into_iter().collect(), - }); + return Bson::RegularExpression(Regex::new( + pattern.into(), + options.into(), + )); } } } @@ -1014,6 +1011,15 @@ pub struct Regex { pub options: String, } +impl Regex { + pub(crate) fn new(pattern: String, options: String) -> Self { + let mut chars: Vec<_> = options.chars().collect(); + chars.sort_unstable(); + let options: String = chars.into_iter().collect(); + Self { pattern, options } + } +} + impl Display for Regex { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { write!(fmt, "/{}/{}", self.pattern, self.options) diff --git a/src/extjson/models.rs b/src/extjson/models.rs index 41851f21..0b98f0f3 100644 --- a/src/extjson/models.rs +++ b/src/extjson/models.rs @@ -116,14 +116,7 @@ pub(crate) struct RegexBody { impl Regex { pub(crate) fn parse(self) -> crate::Regex { - let mut chars: Vec<_> = self.body.options.chars().collect(); - chars.sort_unstable(); - let options: String = chars.into_iter().collect(); - - crate::Regex { - pattern: self.body.pattern, - options, - } + crate::Regex::new(self.body.pattern, self.body.options) } } diff --git a/src/raw/elem.rs b/src/raw/elem.rs index ac00af4c..7a38a341 100644 --- a/src/raw/elem.rs +++ b/src/raw/elem.rs @@ -7,7 +7,6 @@ use crate::{ Bson, DbPointer, Decimal128, - Document, Timestamp, }; @@ -405,8 +404,12 @@ impl<'a> TryFrom> for Bson { Bson::Document(doc) } RawBson::Array(rawarray) => { - let doc: Document = rawarray.doc.try_into()?; - Bson::Array(doc.into_iter().map(|(_k, v)| v).collect()) + let mut items = Vec::new(); + for v in rawarray { + let bson: Bson = v?.try_into()?; + items.push(bson); + } + Bson::Array(items) } RawBson::Binary(rawbson) => { let RawBinary { subtype, data } = rawbson; @@ -419,10 +422,10 @@ impl<'a> TryFrom> for Bson { RawBson::Boolean(rawbson) => Bson::Boolean(rawbson), RawBson::DateTime(rawbson) => Bson::DateTime(rawbson), RawBson::Null => Bson::Null, - RawBson::RegularExpression(rawregex) => Bson::RegularExpression(crate::Regex { - pattern: String::from(rawregex.pattern()), - options: String::from(rawregex.options()), - }), + RawBson::RegularExpression(rawregex) => Bson::RegularExpression(crate::Regex::new( + rawregex.pattern.to_string(), + rawregex.options.to_string(), + )), RawBson::JavaScriptCode(rawbson) => Bson::JavaScriptCode(rawbson.to_string()), RawBson::Int32(rawbson) => Bson::Int32(rawbson), RawBson::Timestamp(rawbson) => Bson::Timestamp(rawbson), diff --git a/src/raw/test/mod.rs b/src/raw/test/mod.rs index 25de2a9b..b5e7eb4c 100644 --- a/src/raw/test/mod.rs +++ b/src/raw/test/mod.rs @@ -4,7 +4,7 @@ use super::*; use crate::{ doc, oid::ObjectId, - spec::{BinarySubtype, ElementType}, + spec::BinarySubtype, Binary, Bson, DateTime, diff --git a/src/tests/spec/corpus.rs b/src/tests/spec/corpus.rs index 330a091f..25536539 100644 --- a/src/tests/spec/corpus.rs +++ b/src/tests/spec/corpus.rs @@ -3,7 +3,7 @@ use std::{ str::FromStr, }; -use crate::{tests::LOCK, Bson, Document}; +use crate::{raw::RawDocumentRef, tests::LOCK, Bson, Document}; use pretty_assertions::assert_eq; use serde::Deserialize; @@ -79,6 +79,11 @@ fn run_test(test: TestFile) { let todocument_documentfromreader_cb: Document = crate::to_document(&documentfromreader_cb).expect(&description); + let document_from_raw_document: Document = RawDocumentRef::new(canonical_bson.as_slice()) + .expect(&description) + .try_into() + .expect(&description); + // These cover the ways to serialize those `Documents` back to BSON. let mut documenttowriter_documentfromreader_cb = Vec::new(); documentfromreader_cb @@ -103,6 +108,11 @@ fn run_test(test: TestFile) { let tovec_documentfromreader_cb = crate::to_vec(&documentfromreader_cb).expect(&description); + let mut documenttowriter_document_from_raw_document = Vec::new(); + document_from_raw_document + .to_writer(&mut documenttowriter_document_from_raw_document) + .expect(&description); + // native_to_bson( bson_to_native(cB) ) = cB // now we ensure the hex for all 5 are equivalent to the canonical BSON provided by the @@ -142,6 +152,13 @@ fn run_test(test: TestFile) { description, ); + assert_eq!( + hex::encode(documenttowriter_document_from_raw_document).to_lowercase(), + valid.canonical_bson.to_lowercase(), + "{}", + description, + ); + // NaN == NaN is false, so we skip document comparisons that contain NaN if !description.to_ascii_lowercase().contains("nan") && !description.contains("decq541") { assert_eq!(documentfromreader_cb, fromreader_cb, "{}", description); @@ -157,6 +174,12 @@ fn run_test(test: TestFile) { "{}", description ); + + assert_eq!( + document_from_raw_document, documentfromreader_cb, + "{}", + description + ); } // native_to_bson( bson_to_native(dB) ) = cB @@ -189,6 +212,21 @@ fn run_test(test: TestFile) { description, ); + let document_from_raw_document: Document = RawDocumentRef::new(db.as_slice()) + .expect(&description) + .try_into() + .expect(&description); + let mut documenttowriter_document_from_raw_document = Vec::new(); + document_from_raw_document + .to_writer(&mut documenttowriter_document_from_raw_document) + .expect(&description); + assert_eq!( + hex::encode(documenttowriter_document_from_raw_document).to_lowercase(), + valid.canonical_bson.to_lowercase(), + "{}", + description, + ); + // NaN == NaN is false, so we skip document comparisons that contain NaN if !description.contains("NaN") { assert_eq!( @@ -196,6 +234,12 @@ fn run_test(test: TestFile) { "{}", description ); + + assert_eq!( + document_from_raw_document, documentfromreader_cb, + "{}", + description + ); } } From bba68edff42d1449ef604347c3f63deefa8f3c4b Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Tue, 12 Oct 2021 15:29:21 -0400 Subject: [PATCH 30/48] finish corpus decode errors --- src/raw/doc.rs | 116 ++++++++++++++++++++++----------------- src/raw/mod.rs | 45 +++++++++++++-- src/tests/spec/corpus.rs | 4 ++ 3 files changed, 110 insertions(+), 55 deletions(-) diff --git a/src/raw/doc.rs b/src/raw/doc.rs index 36f0134c..67527fb3 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -1,12 +1,13 @@ use std::{ borrow::{Borrow, Cow}, convert::{TryFrom, TryInto}, - ops::{Deref, Range}, + ops::Deref, }; use crate::{ - de::read_bool, + de::{read_bool, MIN_BSON_DOCUMENT_SIZE, MIN_CODE_WITH_SCOPE_SIZE}, raw::{ + checked_add, elem::RawDbPointer, error::{try_with_key, ErrorKind}, f64_from_slice, @@ -209,6 +210,7 @@ impl<'a> IntoIterator for &'a RawDocument { Iter { doc: &self, offset: 4, + valid: true, } } } @@ -307,7 +309,6 @@ impl RawDocumentRef { } let length = i32_from_slice(&data)?; - println!("got length {}", length); if data.len() as i32 != length { return Err(Error { @@ -717,6 +718,7 @@ impl<'a> IntoIterator for &'a RawDocumentRef { Iter { doc: self, offset: 4, + valid: true, } } } @@ -725,37 +727,21 @@ impl<'a> IntoIterator for &'a RawDocumentRef { pub struct Iter<'a> { doc: &'a RawDocumentRef, offset: usize, + + /// Whether the underlying doc is assumed to be valid or if an error has been encountered. + /// After an error, all subsequent iterations will return None. + valid: bool, } impl<'a> Iter<'a> { - fn verify_null_terminated(&self, range: Range) -> Result<()> { - if range.is_empty() { - return Err(Error::new_without_key(ErrorKind::MalformedValue { - message: "value has empty range".to_string(), - })); - } - - self.verify_in_range(range.clone())?; - if self.doc.data[range.end - 1] == 0 { - return Ok(()); - } else { - return Err(Error { - key: None, - kind: ErrorKind::MalformedValue { - message: "not null terminated".into(), - }, - }); - } - } - - fn verify_in_range(&self, range: Range) -> Result<()> { - let start = range.start; - let len = range.len(); + fn verify_enough_bytes(&self, start: usize, num_bytes: usize) -> Result<()> { + let end = checked_add(start, num_bytes)?; + let range = start..end; if self.doc.data.get(range).is_none() { return Err(Error::new_without_key(ErrorKind::MalformedValue { message: format!( "length exceeds remaining length of buffer: {} vs {}", - len, + num_bytes, self.doc.data.len() - start ), })); @@ -764,7 +750,7 @@ impl<'a> Iter<'a> { } fn next_oid(&self, starting_at: usize) -> Result { - self.verify_in_range(starting_at..(starting_at + 12))?; + self.verify_enough_bytes(starting_at, 12)?; let oid = ObjectId::from_bytes( self.doc.data[starting_at..(starting_at + 12)] .try_into() @@ -775,9 +761,25 @@ impl<'a> Iter<'a> { fn next_document(&self, starting_at: usize) -> Result<&'a RawDocumentRef> { let size = i32_from_slice(&self.doc.data[starting_at..])? as usize; - let range = starting_at..(starting_at + size); - self.verify_null_terminated(range.clone())?; - RawDocumentRef::new(&self.doc.data[range]) + + if size < MIN_BSON_DOCUMENT_SIZE as usize { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: format!("document too small: {} bytes", size), + })); + } + + self.verify_enough_bytes(starting_at, size)?; + let end = starting_at + size; + + if self.doc.data[end - 1] != 0 { + return Err(Error { + key: None, + kind: ErrorKind::MalformedValue { + message: "not null terminated".into(), + }, + }); + } + RawDocumentRef::new(&self.doc.data[starting_at..end]) } } @@ -785,11 +787,14 @@ impl<'a> Iterator for Iter<'a> { type Item = Result<(&'a str, RawBson<'a>)>; fn next(&mut self) -> Option)>> { - if self.offset == self.doc.data.len() - 1 { + if !self.valid { + return None; + } else if self.offset == self.doc.data.len() - 1 { if self.doc.data[self.offset] == 0 { // end of document marker return None; } else { + self.valid = false; return Some(Err(Error { key: None, kind: ErrorKind::MalformedValue { @@ -798,17 +803,20 @@ impl<'a> Iterator for Iter<'a> { })); } } else if self.offset >= self.doc.data.len() { - // return None on subsequent iterations after an error - return None; + self.valid = false; + return Some(Err(Error::new_without_key(ErrorKind::MalformedValue { + message: "iteration overflowed document".to_string(), + }))); } let key = match read_nullterminated(&self.doc.data[self.offset + 1..]) { Ok(k) => k, - Err(e) => return Some(Err(e)), + Err(e) => { + self.valid = false; + return Some(Err(e)); + } }; - println!("iterating {}", key); - let kvp_result = try_with_key(key, || { let valueoffset = self.offset + 1 + key.len() + 1; // type specifier + key + \0 @@ -824,8 +832,6 @@ impl<'a> Iterator for Iter<'a> { } }; - println!("et: {:?}", element_type); - let (element, element_size) = match element_type { ElementType::Int32 => { let i = i32_from_slice(&self.doc.data[valueoffset..])?; @@ -857,7 +863,7 @@ impl<'a> Iterator for Iter<'a> { ElementType::Binary => { let len = i32_from_slice(&self.doc.data[valueoffset..])? as usize; let data_start = valueoffset + 4 + 1; - self.verify_in_range(valueoffset..(data_start + len))?; + self.verify_enough_bytes(valueoffset, len)?; let subtype = BinarySubtype::from(self.doc.data[valueoffset + 4]); let data = match subtype { BinarySubtype::BinaryOld => { @@ -868,7 +874,7 @@ impl<'a> Iterator for Iter<'a> { })); } let oldlength = i32_from_slice(&self.doc.data[data_start..])? as usize; - if oldlength + 4 != len { + if checked_add(oldlength, 4)? != len { return Err(Error::new_without_key(ErrorKind::MalformedValue { message: "old binary subtype has wrong inner declared length" .into(), @@ -925,12 +931,18 @@ impl<'a> Iterator for Iter<'a> { } ElementType::JavaScriptCodeWithScope => { let length = i32_from_slice(&self.doc.data[valueoffset..])? as usize; - self.verify_in_range(valueoffset..(valueoffset + length))?; - let code = read_lenencoded(&self.doc.data[valueoffset + 4..])?; - let scope_start = valueoffset + 4 + 4 + code.len() + 1; - let scope = - RawDocumentRef::new(&self.doc.data[scope_start..(valueoffset + length)])?; + if length < MIN_CODE_WITH_SCOPE_SIZE as usize { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: "code with scope length too small".to_string(), + })); + } + + self.verify_enough_bytes(valueoffset, length)?; + let slice = &self.doc.data[valueoffset..(valueoffset + length)]; + let code = read_lenencoded(&slice[4..])?; + let scope_start = 4 + 4 + code.len() + 1; + let scope = RawDocumentRef::new(&slice[scope_start..])?; ( RawBson::JavaScriptCodeWithScope(RawJavaScriptCodeWithScope { code, @@ -952,7 +964,7 @@ impl<'a> Iterator for Iter<'a> { (RawBson::Symbol(s), 4 + s.len() + 1) } ElementType::Decimal128 => { - self.verify_in_range(valueoffset..(valueoffset + 16))?; + self.verify_enough_bytes(valueoffset, 16)?; ( RawBson::Decimal128(Decimal128::from_bytes( self.doc.data[valueoffset..(valueoffset + 16)] @@ -966,14 +978,16 @@ impl<'a> Iterator for Iter<'a> { ElementType::MaxKey => (RawBson::MaxKey, 0), }; - let nextoffset = valueoffset + element_size; - self.offset = nextoffset; - - self.verify_in_range(valueoffset..nextoffset)?; + self.offset = valueoffset + element_size; + self.verify_enough_bytes(valueoffset, element_size)?; Ok((key, element)) }); + if kvp_result.is_err() { + self.valid = false; + } + Some(kvp_result) } } diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 040cd8b3..78a1b5cc 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -107,7 +107,9 @@ mod error; #[cfg(test)] mod test; -use std::convert::TryInto; +use std::convert::{TryFrom, TryInto}; + +use crate::de::MIN_BSON_STRING_SIZE; pub use self::{ array::{RawArray, RawArrayIter}, @@ -183,19 +185,38 @@ fn read_nullterminated(buf: &[u8]) -> Result<&str> { fn read_lenencoded(buf: &[u8]) -> Result<&str> { let length = i32_from_slice(&buf[..4])?; - if (buf.len() as i32) < length + 4 { + let end = checked_add(usize_try_from_i32(length)?, 4)?; + + if end < MIN_BSON_STRING_SIZE as usize { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: format!( + "BSON length encoded string needs to be at least {} bytes, instead got {}", + MIN_BSON_STRING_SIZE, end + ), + })); + } + + if buf.len() < end { return Err(Error { key: None, kind: ErrorKind::MalformedValue { message: format!( "expected buffer to contain at least {} bytes, but it only has {}", - length + 4, + end, buf.len() ), }, }); } - try_to_str(&buf[4..4 + length as usize - 1]) + + if buf[end - 1] != 0 { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: "expected string to be null-terminated".to_string(), + })); + } + + // exclude null byte + try_to_str(&buf[4..(end - 1)]) } fn try_to_str(data: &[u8]) -> Result<&str> { @@ -207,3 +228,19 @@ fn try_to_str(data: &[u8]) -> Result<&str> { }), } } + +fn usize_try_from_i32(i: i32) -> Result { + usize::try_from(i).map_err(|e| { + Error::new_without_key(ErrorKind::MalformedValue { + message: e.to_string(), + }) + }) +} + +fn checked_add(lhs: usize, rhs: usize) -> Result { + lhs.checked_add(rhs).ok_or_else(|| { + Error::new_without_key(ErrorKind::MalformedValue { + message: "attempted to add with overflow".to_string(), + }) + }) +} diff --git a/src/tests/spec/corpus.rs b/src/tests/spec/corpus.rs index 25536539..c1675bfc 100644 --- a/src/tests/spec/corpus.rs +++ b/src/tests/spec/corpus.rs @@ -411,6 +411,10 @@ fn run_test(test: TestFile) { Document::from_reader(bson.as_slice()).expect_err(&description); crate::from_reader::<_, Document>(bson.as_slice()).expect_err(description.as_str()); + if let Ok(doc) = RawDocumentRef::new(bson.as_slice()) { + Document::try_from(doc).expect_err(&description.as_str()); + } + if decode_error.description.contains("invalid UTF-8") { crate::from_reader_utf8_lossy::<_, Document>(bson.as_slice()).unwrap_or_else(|err| { panic!( From ddd120a27e5aa3cec769d6a79a7ca4b3a5a5abeb Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Tue, 12 Oct 2021 18:00:25 -0400 Subject: [PATCH 31/48] wip typed helpers --- src/raw/doc.rs | 445 +++++++++++++++++++++++++++---------------------- 1 file changed, 243 insertions(+), 202 deletions(-) diff --git a/src/raw/doc.rs b/src/raw/doc.rs index 67527fb3..42d76867 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -387,217 +387,232 @@ impl RawDocumentRef { Ok(None) } - // fn get_with<'a, T>( - // &'a self, - // key: &str, - // f: impl FnOnce(RawBson<'a>) -> Result, - // ) -> Result> { - // self.get(key)?.map(f).transpose() - // } + fn get_with<'a, T>( + &'a self, + key: &str, + expected_type: ElementType, + f: impl FnOnce(RawBson<'a>) -> Option, + ) -> ValueAccessResult { + let bson = self.get(key)?.ok_or(ValueAccessError::NotPresent)?; + match f(bson) { + Some(t) => Ok(t), + None => Err(ValueAccessError::UnexpectedType { + expected: expected_type, + actual: bson.element_type(), + }), + } + } - // /// Gets a reference to the BSON double value corresponding to a given key or returns an - // error /// if the key corresponds to a value which isn't a double. - // /// - // /// ``` - // /// # use bson::raw::Error; - // /// use bson::raw::{ErrorKind, RawDocument}; - // /// use bson::doc; - // /// - // /// let doc = RawDocument::from_document(&doc! { - // /// "bool": true, - // /// "f64": 2.5, - // /// })?; - // /// - // /// assert_eq!(doc.get_f64("f64"), Ok(Some(2.5))); - // /// assert!(matches!(doc.get_f64("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. - // })); /// assert_eq!(doc.get_f64("unknown"), Ok(None)); - // /// # Ok::<(), Error>(()) - // /// ``` - // pub fn get_f64(&self, key: &str) -> Result> { - // self.get_with(key, RawBson::as_f64) - // } + /// Gets a reference to the BSON double value corresponding to a given key or returns an error + /// if the key corresponds to a value which isn't a double. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::raw::{ErrorKind, RawDocument}; + /// use bson::doc; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "bool": true, + /// "f64": 2.5, + /// })?; + /// + /// assert_eq!(doc.get_f64("f64"), Ok(Some(2.5))); + /// assert!(matches!(doc.get_f64("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); + /// assert_eq!(doc.get_f64("unknown"), Ok(None)); + /// # Ok::<(), Error>(()) + /// ``` + pub fn get_f64(&self, key: &str) -> ValueAccessResult { + match self.get(key)? { + Some(RawBson::Double(f)) => Ok(f), + Some(bson) => Err(ValueAccessError::UnexpectedType { + expected: ElementType::Double, + actual: bson.element_type(), + }), + None => Err(ValueAccessError::NotPresent), + } + } - // /// Gets a reference to the string value corresponding to a given key or returns an error if - // the /// key corresponds to a value which isn't a string. - // /// - // /// ``` - // /// # use bson::raw::Error; - // /// use bson::{doc, raw::{RawDocument, ErrorKind}}; - // /// - // /// let doc = RawDocument::from_document(&doc! { - // /// "string": "hello", - // /// "bool": true, - // /// })?; - // /// - // /// assert_eq!(doc.get_str("string"), Ok(Some("hello"))); - // /// assert!(matches!(doc.get_str("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. - // })); /// assert_eq!(doc.get_str("unknown"), Ok(None)); - // /// # Ok::<(), Error>(()) - // /// ``` - // pub fn get_str<'a>(&'a self, key: &str) -> Result> { - // self.get_with(key, RawBson::as_str) - // } + /// Gets a reference to the string value corresponding to a given key or returns an error if the + /// key corresponds to a value which isn't a string. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::{RawDocument, ErrorKind}}; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "string": "hello", + /// "bool": true, + /// })?; + /// + /// assert_eq!(doc.get_str("string"), Ok(Some("hello"))); + /// assert!(matches!(doc.get_str("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); + /// assert_eq!(doc.get_str("unknown"), Ok(None)); + /// # Ok::<(), Error>(()) + /// ``` + pub fn get_str<'a>(&'a self, key: &str) -> ValueAccessResult<&'a str> { + self.get_with(key, ElementType::String, RawBson::as_str) + } - // /// Gets a reference to the document value corresponding to a given key or returns an error - // if /// the key corresponds to a value which isn't a document. - // /// - // /// ``` - // /// # use bson::raw::Error; - // /// use bson::{doc, raw::{ErrorKind, RawDocument}}; - // /// - // /// let doc = RawDocument::from_document(&doc! { - // /// "doc": { "key": "value"}, - // /// "bool": true, - // /// })?; - // /// - // /// assert_eq!(doc.get_document("doc")?.expect("finding key doc").get_str("key"), - // Ok(Some("value"))); /// assert!(matches!(doc.get_document("bool").unwrap_err().kind, - // ErrorKind::UnexpectedType { .. })); /// assert!(doc.get_document("unknown")?.is_none()); - // /// # Ok::<(), Error>(()) - // /// ``` - // pub fn get_document<'a>(&'a self, key: &str) -> Result> { - // self.get_with(key, RawBson::as_document) - // } + /// Gets a reference to the document value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a document. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::{ErrorKind, RawDocument}}; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "doc": { "key": "value"}, + /// "bool": true, + /// })?; + /// + /// assert_eq!(doc.get_document("doc")?.expect("finding key doc").get_str("key"), Ok(Some("value"))); + /// assert!(matches!(doc.get_document("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); + /// assert!(doc.get_document("unknown")?.is_none()); + /// # Ok::<(), Error>(()) + /// ``` + pub fn get_document<'a>(&'a self, key: &str) -> ValueAccessResult<&'a RawDocumentRef> { + self.get_with(key, ElementType::EmbeddedDocument, RawBson::as_document) + } - // /// Gets a reference to the array value corresponding to a given key or returns an error if - // /// the key corresponds to a value which isn't an array. - // /// - // /// ``` - // /// # use bson::raw::Error; - // /// use bson::{doc, raw::RawDocument}; - // /// - // /// let doc = RawDocument::from_document(&doc! { - // /// "array": [true, 3], - // /// "bool": true, - // /// })?; - // /// - // /// let mut arr_iter = doc.get_array("array")?.expect("finding key array").into_iter(); - // /// let _: bool = arr_iter.next().unwrap()?.as_bool()?; - // /// let _: i32 = arr_iter.next().unwrap()?.as_i32()?; - // /// - // /// assert!(arr_iter.next().is_none()); - // /// assert!(doc.get_array("bool").is_err()); - // /// assert!(doc.get_array("unknown")?.is_none()); - // /// # Ok::<(), Error>(()) - // /// ``` - // pub fn get_array<'a>(&'a self, key: &str) -> Result> { - // self.get_with(key, RawBson::as_array) - // } + /// Gets a reference to the array value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't an array. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::RawDocument}; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "array": [true, 3], + /// "bool": true, + /// })?; + /// + /// let mut arr_iter = doc.get_array("array")?.expect("finding key array").into_iter(); + /// let _: bool = arr_iter.next().unwrap()?.as_bool()?; + /// let _: i32 = arr_iter.next().unwrap()?.as_i32()?; + /// + /// assert!(arr_iter.next().is_none()); + /// assert!(doc.get_array("bool").is_err()); + /// assert!(doc.get_array("unknown")?.is_none()); + /// # Ok::<(), Error>(()) + /// ``` + pub fn get_array<'a>(&'a self, key: &str) -> ValueAccessResult<&'a RawArray> { + self.get_with(key, ElementType::Array, RawBson::as_array) + } - // /// Gets a reference to the BSON binary value corresponding to a given key or returns an - // error /// if the key corresponds to a value which isn't a binary value. - // /// - // /// ``` - // /// # use bson::raw::Error; - // /// use bson::{ - // /// doc, - // /// raw::{ErrorKind, RawDocument, RawBinary}, - // /// spec::BinarySubtype, - // /// Binary, - // /// }; - // /// - // /// let doc = RawDocument::from_document(&doc! { - // /// "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1, 2, 3] }, - // /// "bool": true, - // /// })?; - // /// - // /// assert_eq!(doc.get_binary("binary")?.map(RawBinary::as_bytes), Some(&[1, 2, 3][..])); - // /// assert!(matches!(doc.get_binary("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. - // })); /// assert!(doc.get_binary("unknown")?.is_none()); - // /// # Ok::<(), Error>(()) - // /// ``` - // pub fn get_binary<'a>(&'a self, key: &str) -> Result>> { - // self.get_with(key, RawBson::as_binary) - // } + /// Gets a reference to the BSON binary value corresponding to a given key or returns an error + /// if the key corresponds to a value which isn't a binary value. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{ + /// doc, + /// raw::{ErrorKind, RawDocument, RawBinary}, + /// spec::BinarySubtype, + /// Binary, + /// }; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1, 2, 3] }, + /// "bool": true, + /// })?; + /// + /// assert_eq!(doc.get_binary("binary")?.map(RawBinary::as_bytes), Some(&[1, 2, 3][..])); + /// assert!(matches!(doc.get_binary("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); + /// assert!(doc.get_binary("unknown")?.is_none()); + /// # Ok::<(), Error>(()) + /// ``` + pub fn get_binary<'a>(&'a self, key: &str) -> ValueAccessResult> { + self.get_with(key, ElementType::Binary, RawBson::as_binary) + } - // /// Gets a reference to the ObjectId value corresponding to a given key or returns an error - // if /// the key corresponds to a value which isn't an ObjectId. - // /// - // /// ``` - // /// # use bson::raw::Error; - // /// use bson::{doc, oid::ObjectId, raw::{ErrorKind, RawDocument}}; - // /// - // /// let doc = RawDocument::from_document(&doc! { - // /// "_id": ObjectId::new(), - // /// "bool": true, - // /// })?; - // /// - // /// let oid = doc.get_object_id("_id")?.unwrap(); - // /// assert!(matches!(doc.get_object_id("bool").unwrap_err().kind, ErrorKind::UnexpectedType { - // .. })); /// assert!(doc.get_object_id("unknown")?.is_none()); - // /// # Ok::<(), Error>(()) - // /// ``` - // pub fn get_object_id(&self, key: &str) -> Result> { - // self.get_with(key, RawBson::as_object_id) - // } + /// Gets a reference to the ObjectId value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't an ObjectId. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, oid::ObjectId, raw::{ErrorKind, RawDocument}}; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "_id": ObjectId::new(), + /// "bool": true, + /// })?; + /// + /// let oid = doc.get_object_id("_id")?.unwrap(); + /// assert!(matches!(doc.get_object_id("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); + /// assert!(doc.get_object_id("unknown")?.is_none()); + /// # Ok::<(), Error>(()) + /// ``` + pub fn get_object_id(&self, key: &str) -> ValueAccessResult { + self.get_with(key, ElementType::ObjectId, RawBson::as_object_id) + } - // /// Gets a reference to the boolean value corresponding to a given key or returns an error if - // /// the key corresponds to a value which isn't a boolean. - // /// - // /// ``` - // /// # use bson::raw::Error; - // /// use bson::{doc, oid::ObjectId, raw::{RawDocument, ErrorKind}}; - // /// - // /// let doc = RawDocument::from_document(&doc! { - // /// "_id": ObjectId::new(), - // /// "bool": true, - // /// })?; - // /// - // /// assert!(doc.get_bool("bool")?.unwrap()); - // /// assert!(matches!(doc.get_bool("_id").unwrap_err().kind, ErrorKind::UnexpectedType { .. - // })); /// assert!(doc.get_object_id("unknown")?.is_none()); - // /// # Ok::<(), Error>(()) - // /// ``` - // pub fn get_bool(&self, key: &str) -> Result> { - // self.get_with(key, RawBson::as_bool) - // } + /// Gets a reference to the boolean value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a boolean. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, oid::ObjectId, raw::{RawDocument, ErrorKind}}; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "_id": ObjectId::new(), + /// "bool": true, + /// })?; + /// + /// assert!(doc.get_bool("bool")?.unwrap()); + /// assert!(matches!(doc.get_bool("_id").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); + /// assert!(doc.get_object_id("unknown")?.is_none()); + /// # Ok::<(), Error>(()) + /// ``` + pub fn get_bool(&self, key: &str) -> ValueAccessResult { + self.get_with(key, ElementType::Boolean, RawBson::as_bool) + } - // /// Gets a reference to the BSON DateTime value corresponding to a given key or returns an - // error /// if the key corresponds to a value which isn't a DateTime. - // /// - // /// ``` - // /// # use bson::raw::Error; - // /// use bson::{doc, raw::{ErrorKind, RawDocument}, DateTime}; - // /// - // /// let dt = DateTime::now(); - // /// let doc = RawDocument::from_document(&doc! { - // /// "created_at": dt, - // /// "bool": true, - // /// })?; - // /// - // /// assert_eq!(doc.get_datetime("created_at")?, Some(dt)); - // /// assert!(matches!(doc.get_datetime("bool").unwrap_err().kind, ErrorKind::UnexpectedType { - // .. })); /// assert!(doc.get_datetime("unknown")?.is_none()); - // /// # Ok::<(), Error>(()) - // /// ``` - // pub fn get_datetime(&self, key: &str) -> Result> { - // self.get_with(key, RawBson::as_datetime) - // } + /// Gets a reference to the BSON DateTime value corresponding to a given key or returns an + /// error if the key corresponds to a value which isn't a DateTime. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::{ErrorKind, RawDocument}, DateTime}; + /// + /// let dt = DateTime::now(); + /// let doc = RawDocument::from_document(&doc! { + /// "created_at": dt, + /// "bool": true, + /// })?; + /// + /// assert_eq!(doc.get_datetime("created_at")?, Some(dt)); + /// assert!(matches!(doc.get_datetime("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); + /// assert!(doc.get_datetime("unknown")?.is_none()); + /// # Ok::<(), Error>(()) + /// ``` + pub fn get_datetime(&self, key: &str) -> ValueAccessResult { + self.get_with(key, ElementType::DateTime, RawBson::as_datetime) + } - // /// Gets a reference to the BSON regex value corresponding to a given key or returns an error - // if /// the key corresponds to a value which isn't a regex. - // /// - // /// ``` - // /// # use bson::raw::Error; - // /// use bson::{doc, Regex, raw::{RawDocument, ErrorKind}}; - // /// - // /// let doc = RawDocument::from_document(&doc! { - // /// "regex": Regex { - // /// pattern: r"end\s*$".into(), - // /// options: "i".into(), - // /// }, - // /// "bool": true, - // /// })?; - // /// - // /// assert_eq!(doc.get_regex("regex")?.unwrap().pattern(), r"end\s*$"); - // /// assert_eq!(doc.get_regex("regex")?.unwrap().options(), "i"); - // /// assert!(matches!(doc.get_regex("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. - // })); /// assert!(doc.get_regex("unknown")?.is_none()); - // /// # Ok::<(), Error>(()) - // /// ``` - // pub fn get_regex<'a>(&'a self, key: &str) -> Result>> { - // self.get_with(key, RawBson::as_regex) - // } + /// Gets a reference to the BSON regex value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a regex. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, Regex, raw::{RawDocument, ErrorKind}}; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "regex": Regex { + /// pattern: r"end\s*$".into(), + /// options: "i".into(), + /// }, + /// "bool": true, + /// })?; + /// + /// assert_eq!(doc.get_regex("regex")?.unwrap().pattern(), r"end\s*$"); + /// assert_eq!(doc.get_regex("regex")?.unwrap().options(), "i"); + /// assert!(matches!(doc.get_regex("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); + /// assert!(doc.get_regex("unknown")?.is_none()); + /// # Ok::<(), Error>(()) + /// ``` + pub fn get_regex<'a>(&'a self, key: &str) -> ValueAccessResult> { + self.get_with(key, ElementType::RegularExpression, RawBson::as_regex) + } // /// Gets a reference to the BSON timestamp value corresponding to a given key or returns an // /// error if the key corresponds to a value which isn't a timestamp. @@ -991,3 +1006,29 @@ impl<'a> Iterator for Iter<'a> { Some(kvp_result) } } + +type ValueAccessResult = std::result::Result; + +/// Error to indicate that either a value was empty or it contained an unexpected +/// type, for use with the direct getters. +#[derive(PartialEq, Clone)] +#[non_exhaustive] +pub enum ValueAccessError { + /// Cannot find the expected field with the specified key + NotPresent, + + /// Found a Bson value with the specified key, but not with the expected type + UnexpectedType { + expected: ElementType, + actual: ElementType, + }, + + /// An error was encountered attempting to decode the document. + InvalidBson(super::Error), +} + +impl From for ValueAccessError { + fn from(e: super::Error) -> Self { + ValueAccessError::InvalidBson(e) + } +} From e59b0a7fd234703b4b8a150f23703ac8f977a7ed Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 13 Oct 2021 16:02:47 -0400 Subject: [PATCH 32/48] finish typed helpers, add key to ValueAccessError, various cleanup --- src/raw/array.rs | 85 +++++++++----- src/raw/doc.rs | 252 ++++++++++++++++++------------------------ src/raw/elem.rs | 282 +++++++++++------------------------------------ src/raw/error.rs | 50 +++++++++ 4 files changed, 280 insertions(+), 389 deletions(-) diff --git a/src/raw/array.rs b/src/raw/array.rs index e33022a9..96f75ff4 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -1,7 +1,16 @@ use std::convert::TryFrom; -use super::{Error, Iter, RawBinary, RawBson, RawDocumentRef, RawRegex, Result}; -use crate::{oid::ObjectId, Bson, DateTime, Timestamp}; +use super::{ + error::{ValueAccessError, ValueAccessErrorKind, ValueAccessResult}, + Error, + Iter, + RawBinary, + RawBson, + RawDocumentRef, + RawRegex, + Result, +}; +use crate::{oid::ObjectId, spec::ElementType, Bson, DateTime, Timestamp}; /// A BSON array referencing raw bytes stored elsewhere. #[repr(transparent)] @@ -31,81 +40,101 @@ impl RawArray { fn get_with<'a, T>( &'a self, index: usize, + expected_type: ElementType, f: impl FnOnce(RawBson<'a>) -> Option, - ) -> Result> { - Ok(self.get(index)?.and_then(f)) + ) -> ValueAccessResult { + let bson = self + .get(index) + .map_err(|e| ValueAccessError { + key: index.to_string(), + kind: ValueAccessErrorKind::InvalidBson(e), + })? + .ok_or(ValueAccessError { + key: index.to_string(), + kind: ValueAccessErrorKind::NotPresent, + })?; + match f(bson) { + Some(t) => Ok(t), + None => Err(ValueAccessError { + key: index.to_string(), + kind: ValueAccessErrorKind::UnexpectedType { + expected: expected_type, + actual: bson.element_type(), + }, + }), + } } /// Gets the BSON double at the given index or returns an error if the value at that index isn't /// a double. - pub fn get_f64(&self, index: usize) -> Result> { - self.get_with(index, RawBson::as_f64) + pub fn get_f64(&self, index: usize) -> ValueAccessResult { + self.get_with(index, ElementType::Double, RawBson::as_f64) } /// Gets a reference to the string at the given index or returns an error if the /// value at that index isn't a string. - pub fn get_str(&self, index: usize) -> Result> { - self.get_with(index, RawBson::as_str) + pub fn get_str(&self, index: usize) -> ValueAccessResult<&str> { + self.get_with(index, ElementType::String, RawBson::as_str) } /// Gets a reference to the document at the given index or returns an error if the /// value at that index isn't a document. - pub fn get_document(&self, index: usize) -> Result> { - self.get_with(index, RawBson::as_document) + pub fn get_document(&self, index: usize) -> ValueAccessResult<&RawDocumentRef> { + self.get_with(index, ElementType::EmbeddedDocument, RawBson::as_document) } /// Gets a reference to the array at the given index or returns an error if the /// value at that index isn't a array. - pub fn get_array(&self, index: usize) -> Result> { - self.get_with(index, RawBson::as_array) + pub fn get_array(&self, index: usize) -> ValueAccessResult<&RawArray> { + self.get_with(index, ElementType::Array, RawBson::as_array) } /// Gets a reference to the BSON binary value at the given index or returns an error if the /// value at that index isn't a binary. - pub fn get_binary(&self, index: usize) -> Result>> { - self.get_with(index, RawBson::as_binary) + pub fn get_binary(&self, index: usize) -> ValueAccessResult> { + self.get_with(index, ElementType::Binary, RawBson::as_binary) } /// Gets the ObjectId at the given index or returns an error if the value at that index isn't an /// ObjectId. - pub fn get_object_id(&self, index: usize) -> Result> { - self.get_with(index, RawBson::as_object_id) + pub fn get_object_id(&self, index: usize) -> ValueAccessResult { + self.get_with(index, ElementType::ObjectId, RawBson::as_object_id) } /// Gets the boolean at the given index or returns an error if the value at that index isn't a /// boolean. - pub fn get_bool(&self, index: usize) -> Result> { - self.get_with(index, RawBson::as_bool) + pub fn get_bool(&self, index: usize) -> ValueAccessResult { + self.get_with(index, ElementType::Boolean, RawBson::as_bool) } /// Gets the DateTime at the given index or returns an error if the value at that index isn't a /// DateTime. - pub fn get_datetime(&self, index: usize) -> Result> { - Ok(self.get_with(index, RawBson::as_datetime)?.map(Into::into)) + pub fn get_datetime(&self, index: usize) -> ValueAccessResult { + Ok(self.get_with(index, ElementType::DateTime, RawBson::as_datetime)?) } /// Gets a reference to the BSON regex at the given index or returns an error if the /// value at that index isn't a regex. - pub fn get_regex(&self, index: usize) -> Result>> { - self.get_with(index, RawBson::as_regex) + pub fn get_regex(&self, index: usize) -> ValueAccessResult> { + self.get_with(index, ElementType::RegularExpression, RawBson::as_regex) } /// Gets a reference to the BSON timestamp at the given index or returns an error if the /// value at that index isn't a timestamp. - pub fn get_timestamp(&self, index: usize) -> Result> { - self.get_with(index, RawBson::as_timestamp) + pub fn get_timestamp(&self, index: usize) -> ValueAccessResult { + self.get_with(index, ElementType::Timestamp, RawBson::as_timestamp) } /// Gets the BSON int32 at the given index or returns an error if the value at that index isn't /// a 32-bit integer. - pub fn get_i32(&self, index: usize) -> Result> { - self.get_with(index, RawBson::as_i32) + pub fn get_i32(&self, index: usize) -> ValueAccessResult { + self.get_with(index, ElementType::Int32, RawBson::as_i32) } /// Gets BSON int64 at the given index or returns an error if the value at that index isn't a /// 64-bit integer. - pub fn get_i64(&self, index: usize) -> Result> { - self.get_with(index, RawBson::as_i64) + pub fn get_i64(&self, index: usize) -> ValueAccessResult { + self.get_with(index, ElementType::Int64, RawBson::as_i64) } /// Gets a reference to the raw bytes of the RawArray. diff --git a/src/raw/doc.rs b/src/raw/doc.rs index 42d76867..4c41d102 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -21,6 +21,7 @@ use crate::{ }; use super::{ + error::{ValueAccessError, ValueAccessErrorKind, ValueAccessResult}, i32_from_slice, read_lenencoded, read_nullterminated, @@ -31,8 +32,6 @@ use super::{ RawRegex, Result, }; -#[cfg(feature = "decimal128")] -use crate::decimal128::Decimal128; use crate::{oid::ObjectId, spec::ElementType, Document}; /// A BSON document, stored as raw bytes on the heap. This can be created from a `Vec` or @@ -92,41 +91,22 @@ impl RawDocument { /// # Ok::<(), Error>(()) /// ``` pub fn new(data: Vec) -> Result { - if data.len() < 5 { - return Err(Error::new_without_key(ErrorKind::MalformedValue { - message: "document too short".into(), - })); - } - - let length = i32_from_slice(&data)?; - - if data.len() as i32 != length { - return Err(Error::new_without_key(ErrorKind::MalformedValue { - message: "document length incorrect".into(), - })); - } - - if data[data.len() - 1] != 0 { - return Err(Error::new_without_key(ErrorKind::MalformedValue { - message: "document not null-terminated".into(), - })); - } - + let _ = RawDocumentRef::new(data.as_slice())?; Ok(Self { data }) } /// Create a RawDocument from a Document. /// /// ``` - /// # use bson::raw::{RawDocument, Error}; - /// use bson::{doc, oid::ObjectId}; + /// # use bson::raw::Error; + /// use bson::{doc, oid::ObjectId, raw::RawDocument}; /// /// let document = doc! { /// "_id": ObjectId::new(), /// "name": "Herman Melville", /// "title": "Moby-Dick", /// }; - /// let doc = RawDocument::from_document(&document); + /// let doc = RawDocument::from_document(&document)?; /// # Ok::<(), Error>(()) /// ``` pub fn from_document(doc: &Document) -> Result { @@ -160,7 +140,7 @@ impl RawDocument { /// /// # Note: /// - /// There is no owning iterator for RawDocument. If you need ownership over + /// There is no owning iterator for RawDocument. If you need ownership over /// elements that might need to allocate, you must explicitly convert /// them to owned types yourself. pub fn iter(&self) -> Iter<'_> { @@ -178,7 +158,7 @@ impl RawDocument { /// # Ok::<(), Error>(()) /// ``` pub fn into_vec(self) -> Vec { - self.data.to_vec() + self.data } } @@ -377,10 +357,10 @@ impl RawDocumentRef { /// assert!(doc.get("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` - pub fn get<'a>(&'a self, key: &str) -> Result>> { + pub fn get<'a>(&'a self, key: impl AsRef) -> Result>> { for result in self.into_iter() { let (k, v) = result?; - if key == k { + if key.as_ref() == k { return Ok(Some(v)); } } @@ -389,16 +369,30 @@ impl RawDocumentRef { fn get_with<'a, T>( &'a self, - key: &str, + key: impl AsRef, expected_type: ElementType, f: impl FnOnce(RawBson<'a>) -> Option, ) -> ValueAccessResult { - let bson = self.get(key)?.ok_or(ValueAccessError::NotPresent)?; + let key = key.as_ref(); + + let bson = self + .get(key) + .map_err(|e| ValueAccessError { + key: key.to_string(), + kind: ValueAccessErrorKind::InvalidBson(e), + })? + .ok_or(ValueAccessError { + key: key.to_string(), + kind: ValueAccessErrorKind::NotPresent, + })?; match f(bson) { Some(t) => Ok(t), - None => Err(ValueAccessError::UnexpectedType { - expected: expected_type, - actual: bson.element_type(), + None => Err(ValueAccessError { + key: key.to_string(), + kind: ValueAccessErrorKind::UnexpectedType { + expected: expected_type, + actual: bson.element_type(), + }, }), } } @@ -421,15 +415,8 @@ impl RawDocumentRef { /// assert_eq!(doc.get_f64("unknown"), Ok(None)); /// # Ok::<(), Error>(()) /// ``` - pub fn get_f64(&self, key: &str) -> ValueAccessResult { - match self.get(key)? { - Some(RawBson::Double(f)) => Ok(f), - Some(bson) => Err(ValueAccessError::UnexpectedType { - expected: ElementType::Double, - actual: bson.element_type(), - }), - None => Err(ValueAccessError::NotPresent), - } + pub fn get_f64(&self, key: impl AsRef) -> ValueAccessResult { + self.get_with(key, ElementType::Double, RawBson::as_f64) } /// Gets a reference to the string value corresponding to a given key or returns an error if the @@ -449,7 +436,7 @@ impl RawDocumentRef { /// assert_eq!(doc.get_str("unknown"), Ok(None)); /// # Ok::<(), Error>(()) /// ``` - pub fn get_str<'a>(&'a self, key: &str) -> ValueAccessResult<&'a str> { + pub fn get_str<'a>(&'a self, key: impl AsRef) -> ValueAccessResult<&'a str> { self.get_with(key, ElementType::String, RawBson::as_str) } @@ -470,7 +457,10 @@ impl RawDocumentRef { /// assert!(doc.get_document("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` - pub fn get_document<'a>(&'a self, key: &str) -> ValueAccessResult<&'a RawDocumentRef> { + pub fn get_document<'a>( + &'a self, + key: impl AsRef, + ) -> ValueAccessResult<&'a RawDocumentRef> { self.get_with(key, ElementType::EmbeddedDocument, RawBson::as_document) } @@ -495,7 +485,7 @@ impl RawDocumentRef { /// assert!(doc.get_array("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` - pub fn get_array<'a>(&'a self, key: &str) -> ValueAccessResult<&'a RawArray> { + pub fn get_array<'a>(&'a self, key: impl AsRef) -> ValueAccessResult<&'a RawArray> { self.get_with(key, ElementType::Array, RawBson::as_array) } @@ -521,7 +511,7 @@ impl RawDocumentRef { /// assert!(doc.get_binary("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` - pub fn get_binary<'a>(&'a self, key: &str) -> ValueAccessResult> { + pub fn get_binary<'a>(&'a self, key: impl AsRef) -> ValueAccessResult> { self.get_with(key, ElementType::Binary, RawBson::as_binary) } @@ -542,7 +532,7 @@ impl RawDocumentRef { /// assert!(doc.get_object_id("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` - pub fn get_object_id(&self, key: &str) -> ValueAccessResult { + pub fn get_object_id(&self, key: impl AsRef) -> ValueAccessResult { self.get_with(key, ElementType::ObjectId, RawBson::as_object_id) } @@ -563,7 +553,7 @@ impl RawDocumentRef { /// assert!(doc.get_object_id("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` - pub fn get_bool(&self, key: &str) -> ValueAccessResult { + pub fn get_bool(&self, key: impl AsRef) -> ValueAccessResult { self.get_with(key, ElementType::Boolean, RawBson::as_bool) } @@ -585,7 +575,7 @@ impl RawDocumentRef { /// assert!(doc.get_datetime("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` - pub fn get_datetime(&self, key: &str) -> ValueAccessResult { + pub fn get_datetime(&self, key: impl AsRef) -> ValueAccessResult { self.get_with(key, ElementType::DateTime, RawBson::as_datetime) } @@ -610,75 +600,75 @@ impl RawDocumentRef { /// assert!(doc.get_regex("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` - pub fn get_regex<'a>(&'a self, key: &str) -> ValueAccessResult> { + pub fn get_regex<'a>(&'a self, key: impl AsRef) -> ValueAccessResult> { self.get_with(key, ElementType::RegularExpression, RawBson::as_regex) } - // /// Gets a reference to the BSON timestamp value corresponding to a given key or returns an - // /// error if the key corresponds to a value which isn't a timestamp. - // /// - // /// ``` - // /// # use bson::raw::Error; - // /// use bson::{doc, Timestamp, raw::{RawDocument, ErrorKind}}; - // /// - // /// let doc = RawDocument::from_document(&doc! { - // /// "bool": true, - // /// "ts": Timestamp { time: 649876543, increment: 9 }, - // /// })?; - // /// - // /// let timestamp = doc.get_timestamp("ts")?.unwrap(); - // /// - // /// assert_eq!(timestamp.time(), 649876543); - // /// assert_eq!(timestamp.increment(), 9); - // /// assert!(matches!(doc.get_timestamp("bool").unwrap_err().kind, ErrorKind::UnexpectedType { - // .. })); /// assert_eq!(doc.get_timestamp("unknown"), Ok(None)); - // /// # Ok::<(), Error>(()) - // /// ``` - // pub fn get_timestamp<'a>(&'a self, key: &str) -> Result>> { - // self.get_with(key, RawBson::as_timestamp) - // } - - // /// Gets a reference to the BSON int32 value corresponding to a given key or returns an error - // if /// the key corresponds to a value which isn't a 32-bit integer. - // /// - // /// ``` - // /// # use bson::raw::Error; - // /// use bson::{doc, raw::{RawDocument, ErrorKind}}; - // /// - // /// let doc = RawDocument::from_document(&doc! { - // /// "bool": true, - // /// "i32": 1_000_000, - // /// })?; - // /// - // /// assert_eq!(doc.get_i32("i32"), Ok(Some(1_000_000))); - // /// assert!(matches!(doc.get_i32("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. - // })); /// assert_eq!(doc.get_i32("unknown"), Ok(None)); - // /// # Ok::<(), Error>(()) - // /// ``` - // pub fn get_i32(&self, key: &str) -> Result> { - // self.get_with(key, RawBson::as_i32) - // } - - // /// Gets a reference to the BSON int64 value corresponding to a given key or returns an error - // if /// the key corresponds to a value which isn't a 64-bit integer. - // /// - // /// ``` - // /// # use bson::raw::Error; - // /// use bson::{doc, raw::{ErrorKind, RawDocument}}; - // /// - // /// let doc = RawDocument::from_document(&doc! { - // /// "bool": true, - // /// "i64": 9223372036854775807_i64, - // /// })?; - // /// - // /// assert_eq!(doc.get_i64("i64"), Ok(Some(9223372036854775807))); - // /// assert!(matches!(doc.get_i64("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. - // })); /// assert_eq!(doc.get_i64("unknown"), Ok(None)); - // /// # Ok::<(), Error>(()) - // /// ``` - // pub fn get_i64(&self, key: &str) -> Result> { - // self.get_with(key, RawBson::as_i64) - // } + /// Gets a reference to the BSON timestamp value corresponding to a given key or returns an + /// error if the key corresponds to a value which isn't a timestamp. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, Timestamp, raw::{RawDocument, ErrorKind}}; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "bool": true, + /// "ts": Timestamp { time: 649876543, increment: 9 }, + /// })?; + /// + /// let timestamp = doc.get_timestamp("ts")?.unwrap(); + /// + /// assert_eq!(timestamp.time(), 649876543); + /// assert_eq!(timestamp.increment(), 9); + /// assert!(matches!(doc.get_timestamp("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); + /// assert_eq!(doc.get_timestamp("unknown"), Ok(None)); + /// # Ok::<(), Error>(()) + /// ``` + pub fn get_timestamp(&self, key: impl AsRef) -> ValueAccessResult { + self.get_with(key, ElementType::Timestamp, RawBson::as_timestamp) + } + + /// Gets a reference to the BSON int32 value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a 32-bit integer. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::{RawDocument, ErrorKind}}; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "bool": true, + /// "i32": 1_000_000, + /// })?; + /// + /// assert_eq!(doc.get_i32("i32"), Ok(Some(1_000_000))); + /// assert!(matches!(doc.get_i32("bool").unwrap_err().kind, ErrorKind::UnexpectedType { ..})); + /// assert_eq!(doc.get_i32("unknown"), Ok(None)); + /// # Ok::<(), Error>(()) + /// ``` + pub fn get_i32(&self, key: impl AsRef) -> ValueAccessResult { + self.get_with(key, ElementType::Int32, RawBson::as_i32) + } + + /// Gets a reference to the BSON int64 value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a 64-bit integer. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::{ErrorKind, RawDocument}}; + /// + /// let doc = RawDocument::from_document(&doc! { + /// "bool": true, + /// "i64": 9223372036854775807_i64, + /// })?; + /// + /// assert_eq!(doc.get_i64("i64"), Ok(Some(9223372036854775807))); + /// assert!(matches!(doc.get_i64("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); + /// assert_eq!(doc.get_i64("unknown"), Ok(None)); + /// # Ok::<(), Error>(()) + /// ``` + pub fn get_i64(&self, key: impl AsRef) -> ValueAccessResult { + self.get_with(key, ElementType::Int64, RawBson::as_i64) + } /// Return a reference to the contained data as a `&[u8]` /// @@ -751,8 +741,7 @@ pub struct Iter<'a> { impl<'a> Iter<'a> { fn verify_enough_bytes(&self, start: usize, num_bytes: usize) -> Result<()> { let end = checked_add(start, num_bytes)?; - let range = start..end; - if self.doc.data.get(range).is_none() { + if self.doc.data.get(start..end).is_none() { return Err(Error::new_without_key(ErrorKind::MalformedValue { message: format!( "length exceeds remaining length of buffer: {} vs {}", @@ -775,6 +764,7 @@ impl<'a> Iter<'a> { } fn next_document(&self, starting_at: usize) -> Result<&'a RawDocumentRef> { + self.verify_enough_bytes(starting_at, MIN_BSON_DOCUMENT_SIZE as usize)?; let size = i32_from_slice(&self.doc.data[starting_at..])? as usize; if size < MIN_BSON_DOCUMENT_SIZE as usize { @@ -1006,29 +996,3 @@ impl<'a> Iterator for Iter<'a> { Some(kvp_result) } } - -type ValueAccessResult = std::result::Result; - -/// Error to indicate that either a value was empty or it contained an unexpected -/// type, for use with the direct getters. -#[derive(PartialEq, Clone)] -#[non_exhaustive] -pub enum ValueAccessError { - /// Cannot find the expected field with the specified key - NotPresent, - - /// Found a Bson value with the specified key, but not with the expected type - UnexpectedType { - expected: ElementType, - actual: ElementType, - }, - - /// An error was encountered attempting to decode the document. - InvalidBson(super::Error), -} - -impl From for ValueAccessError { - fn from(e: super::Error) -> Self { - ValueAccessError::InvalidBson(e) - } -} diff --git a/src/raw/elem.rs b/src/raw/elem.rs index 7a38a341..bec60baf 100644 --- a/src/raw/elem.rs +++ b/src/raw/elem.rs @@ -57,42 +57,36 @@ pub enum RawBson<'a> { DbPointer(RawDbPointer<'a>), } -// #[derive(Clone, Copy, Debug)] -// pub struct RawBson<'a> { -// element_type: ElementType, -// data: &'a [u8], -// } - impl<'a> RawBson<'a> { - // pub(super) fn new(element_type: ElementType, data: &'a [u8]) -> RawBson<'a> { - // RawBson { element_type, data } - // } - - /// Gets the type of the value. - pub fn element_type(self) -> ElementType { - // self.element_type - todo!() + /// Get the [`ElementType`] of this value. + pub fn element_type(&self) -> ElementType { + match *self { + RawBson::Double(..) => ElementType::Double, + RawBson::String(..) => ElementType::String, + RawBson::Array(..) => ElementType::Array, + RawBson::Document(..) => ElementType::EmbeddedDocument, + RawBson::Boolean(..) => ElementType::Boolean, + RawBson::Null => ElementType::Null, + RawBson::RegularExpression(..) => ElementType::RegularExpression, + RawBson::JavaScriptCode(..) => ElementType::JavaScriptCode, + RawBson::JavaScriptCodeWithScope(..) => ElementType::JavaScriptCodeWithScope, + RawBson::Int32(..) => ElementType::Int32, + RawBson::Int64(..) => ElementType::Int64, + RawBson::Timestamp(..) => ElementType::Timestamp, + RawBson::Binary(..) => ElementType::Binary, + RawBson::ObjectId(..) => ElementType::ObjectId, + RawBson::DateTime(..) => ElementType::DateTime, + RawBson::Symbol(..) => ElementType::Symbol, + RawBson::Decimal128(..) => ElementType::Decimal128, + RawBson::Undefined => ElementType::Undefined, + RawBson::MaxKey => ElementType::MaxKey, + RawBson::MinKey => ElementType::MinKey, + RawBson::DbPointer(..) => ElementType::DbPointer, + } } - // /// Gets a reference to the raw bytes of the value. - // pub fn as_bytes(self) -> &'a [u8] { - // self.data - // } - - // fn validate_type(self, expected: ElementType) -> Result<()> { - // if self.element_type != expected { - // return Err(Error { - // key: None, - // kind: ErrorKind::UnexpectedType { - // actual: self.element_type, - // expected, - // }, - // }); - // } - // Ok(()) - // } - - /// Gets the f64 that's referenced or returns an error if the value isn't a BSON double. + /// Gets the f64 that's referenced or returns `None` if the referenced value isn't a BSON + /// double. pub fn as_f64(self) -> Option { match self { RawBson::Double(d) => Some(d), @@ -100,7 +94,8 @@ impl<'a> RawBson<'a> { } } - /// If `Bson` is `String`, return its value as a `&str`. Returns `None` otherwise + /// Gets the `&str` that's referenced or returns `None` if the referenced value isn't a BSON + /// String. pub fn as_str(self) -> Option<&'a str> { match self { RawBson::String(s) => Some(s), @@ -108,7 +103,8 @@ impl<'a> RawBson<'a> { } } - /// If `Bson` is `Array`, return its value. Returns `None` otherwise + /// Gets the [`&RawArray`] that's referenced or returns `None` if the referenced value isn't a + /// BSON array. pub fn as_array(self) -> Option<&'a RawArray> { match self { RawBson::Array(v) => Some(v), @@ -116,7 +112,8 @@ impl<'a> RawBson<'a> { } } - /// If `Bson` is `Document`, return its value. Returns `None` otherwise + /// Gets the [`&RawDocumentRef`] that's referenced or returns `None` if the referenced value + /// isn't a BSON document. pub fn as_document(self) -> Option<&'a RawDocumentRef> { match self { RawBson::Document(v) => Some(v), @@ -124,7 +121,8 @@ impl<'a> RawBson<'a> { } } - /// If `Bson` is `Bool`, return its value. Returns `None` otherwise + /// Gets the `bool` that's referenced or returns `None` if the referenced value isn't a BSON + /// boolean. pub fn as_bool(self) -> Option { match self { RawBson::Boolean(v) => Some(v), @@ -132,7 +130,8 @@ impl<'a> RawBson<'a> { } } - /// If `Bson` is `I32`, return its value. Returns `None` otherwise + /// Gets the `i32` that's referenced or returns `None` if the referenced value isn't a BSON + /// Int32. pub fn as_i32(self) -> Option { match self { RawBson::Int32(v) => Some(v), @@ -140,7 +139,8 @@ impl<'a> RawBson<'a> { } } - /// If `Bson` is `I64`, return its value. Returns `None` otherwise + /// Gets the `i64` that's referenced or returns `None` if the referenced value isn't a BSON + /// Int64. pub fn as_i64(self) -> Option { match self { RawBson::Int64(v) => Some(v), @@ -148,7 +148,8 @@ impl<'a> RawBson<'a> { } } - /// If `Bson` is `Objectid`, return its value. Returns `None` otherwise + /// Gets the [`ObjectId`] that's referenced or returns `None` if the referenced value isn't a + /// BSON ObjectID. pub fn as_object_id(self) -> Option { match self { RawBson::ObjectId(v) => Some(v), @@ -156,7 +157,8 @@ impl<'a> RawBson<'a> { } } - /// If `Bson` is `Binary`, return its value. Returns `None` otherwise + /// Gets the [`RawBinary`] that's referenced or returns `None` if the referenced value isn't a + /// BSON binary. pub fn as_binary(self) -> Option> { match self { RawBson::Binary(v) => Some(v), @@ -164,7 +166,8 @@ impl<'a> RawBson<'a> { } } - /// If `Bson` is `Regex`, return its value. Returns `None` otherwise + /// Gets the [`RawRegex`] that's referenced or returns `None` if the referenced value isn't a + /// BSON regular expression. pub fn as_regex(self) -> Option> { match self { RawBson::RegularExpression(v) => Some(v), @@ -172,7 +175,8 @@ impl<'a> RawBson<'a> { } } - /// If `Bson` is `DateTime`, return its value. Returns `None` otherwise + /// Gets the [`DateTime`] that's referenced or returns `None` if the referenced value isn't a + /// BSON datetime. pub fn as_datetime(self) -> Option { match self { RawBson::DateTime(v) => Some(v), @@ -180,7 +184,8 @@ impl<'a> RawBson<'a> { } } - /// If `Bson` is `Symbol`, return its value. Returns `None` otherwise + /// Gets the symbol that's referenced or returns `None` if the referenced value isn't a BSON + /// symbol. pub fn as_symbol(self) -> Option<&'a str> { match self { RawBson::Symbol(v) => Some(v), @@ -188,7 +193,8 @@ impl<'a> RawBson<'a> { } } - /// If `Bson` is `Timestamp`, return its value. Returns `None` otherwise + /// Gets the [`Timestamp`] that's referenced or returns `None` if the referenced value isn't a + /// BSON timestamp. pub fn as_timestamp(self) -> Option { match self { RawBson::Timestamp(timestamp) => Some(timestamp), @@ -196,7 +202,8 @@ impl<'a> RawBson<'a> { } } - /// If `Bson` is `Null`, return its value. Returns `None` otherwise + /// Gets the null value that's referenced or returns `None` if the referenced value isn't a BSON + /// null. pub fn as_null(self) -> Option<()> { match self { RawBson::Null => Some(()), @@ -204,15 +211,17 @@ impl<'a> RawBson<'a> { } } - pub fn as_db_pointer(self) -> Option { - // match self { - // Bson::DbPointer(db_pointer) => Some(db_pointer), - // _ => None, - // } - todo!() + /// Gets the [`RawDbPointer`] that's referenced or returns `None` if the referenced value isn't + /// a BSON DB pointer. + pub fn as_db_pointer(self) -> Option> { + match self { + RawBson::DbPointer(d) => Some(d), + _ => None, + } } - /// If `Bson` is `JavaScriptCode`, return its value. Returns `None` otherwise + /// Gets the code that's referenced or returns `None` if the referenced value isn't a BSON + /// JavaScript. pub fn as_javascript(self) -> Option<&'a str> { match self { RawBson::JavaScriptCode(s) => Some(s), @@ -220,7 +229,8 @@ impl<'a> RawBson<'a> { } } - /// If `Bson` is `JavaScriptCodeWithScope`, return its value. Returns `None` otherwise + /// Gets the [`RawJavaScriptCodeWithScope`] that's referenced or returns `None` if the + /// referenced value isn't a BSON JavaScript with scope. pub fn as_javascript_with_scope(self) -> Option> { match self { RawBson::JavaScriptCodeWithScope(s) => Some(s), @@ -229,169 +239,6 @@ impl<'a> RawBson<'a> { } } -// impl<'a> RawBson<'a> { - -// /// Gets the string that's referenced or returns an error if the value isn't a BSON string. -// pub fn as_str(self) -> Result<&'a str> { -// self.validate_type(ElementType::String)?; -// read_lenencoded(self.data) -// } - -// /// Gets the document that's referenced or returns an error if the value isn't a BSON -// document. pub fn as_document(self) -> Result<&'a RawDocumentRef> { -// self.validate_type(ElementType::EmbeddedDocument)?; -// RawDocumentRef::new(self.data) -// } - -// /// Gets the array that's referenced or returns an error if the value isn't a BSON array. -// pub fn as_array(self) -> Result<&'a RawArray> { -// self.validate_type(ElementType::Array)?; -// RawArray::new(self.data) -// } - -// /// Gets the BSON binary value that's referenced or returns an error if the value a BSON -// binary. pub fn as_binary(self) -> Result> { -// self.validate_type(ElementType::Binary)?; - -// let length = i32_from_slice(&self.data[0..4])?; -// let subtype = BinarySubtype::from(self.data[4]); -// if self.data.len() as i32 != length + 5 { -// return Err(Error { -// key: None, -// kind: ErrorKind::MalformedValue { -// message: "binary bson has wrong declared length".into(), -// }, -// }); -// } -// let data = match subtype { -// BinarySubtype::BinaryOld => { -// if length < 4 { -// return Err(Error::new_without_key(ErrorKind::MalformedValue { -// message: "old binary subtype has no inner declared length".into(), -// })); -// } -// let oldlength = i32_from_slice(&self.data[5..9])?; -// if oldlength + 4 != length { -// return Err(Error::new_without_key(ErrorKind::MalformedValue { -// message: "old binary subtype has wrong inner declared length".into(), -// })); -// } -// &self.data[9..] -// } -// _ => &self.data[5..], -// }; -// Ok(RawBinary::new(subtype, data)) -// } - -// /// Gets the ObjectId that's referenced or returns an error if the value isn't a BSON -// ObjectId. pub fn as_object_id(self) -> Result { -// self.validate_type(ElementType::ObjectId)?; -// Ok(ObjectId::from_bytes(self.data.try_into().map_err( -// |_| { -// Error::new_without_key(ErrorKind::MalformedValue { -// message: "object id should be 12 bytes long".into(), -// }) -// }, -// )?)) -// } - -// /// Gets the boolean that's referenced or returns an error if the value isn't a BSON boolean. -// pub fn as_bool(self) -> Result { -// self.validate_type(ElementType::Boolean)?; -// if self.data.len() != 1 { -// Err(Error::new_without_key(ErrorKind::MalformedValue { -// message: "boolean has length != 1".into(), -// })) -// } else { -// read_bool(self.data).map_err(|e| { -// Error::new_without_key(ErrorKind::MalformedValue { -// message: e.to_string(), -// }) -// }) -// } -// } - -// /// Gets the DateTime that's referenced or returns an error if the value isn't a BSON -// DateTime. pub fn as_datetime(self) -> Result { -// self.validate_type(ElementType::DateTime)?; -// let millis = i64_from_slice(self.data)?; -// Ok(DateTime::from_millis(millis)) -// } - -// /// Gets the regex that's referenced or returns an error if the value isn't a BSON regex. -// pub fn as_regex(self) -> Result> { -// self.validate_type(ElementType::RegularExpression)?; -// RawRegex::new(self.data) -// } - -// /// Gets the BSON JavaScript code that's referenced or returns an error if the value isn't -// BSON /// JavaScript code. -// pub fn as_javascript(self) -> Result<&'a str> { -// self.validate_type(ElementType::JavaScriptCode)?; -// read_lenencoded(self.data) -// } - -// /// Gets the symbol that's referenced or returns an error if the value isn't a BSON symbol. -// pub fn as_symbol(self) -> Result<&'a str> { -// self.validate_type(ElementType::Symbol)?; -// read_lenencoded(self.data) -// } - -// /// Gets the BSON JavaScript code with scope that's referenced or returns an error if the -// value /// isn't BSON JavaScript code with scope. -// pub fn as_javascript_with_scope(self) -> Result> { -// self.validate_type(ElementType::JavaScriptCodeWithScope)?; -// let length = i32_from_slice(&self.data[..4])?; - -// if (self.data.len() as i32) != length { -// return Err(Error::new_without_key(ErrorKind::MalformedValue { -// message: format!("TODO: Java"), -// })); -// } - -// let code = read_lenencoded(&self.data[4..])?; -// let scope = RawDocumentRef::new(&self.data[9 + code.len()..])?; - -// Ok(RawJavaScriptCodeWithScope { code, scope }) -// } - -// /// Gets the timestamp that's referenced or returns an error if the value isn't a BSON -// /// timestamp. -// pub fn as_timestamp(self) -> Result> { -// self.validate_type(ElementType::Timestamp)?; -// assert_eq!(self.data.len(), 8); -// Ok(RawTimestamp { data: self.data }) -// } - -// /// Gets the i32 that's referenced or returns an error if the value isn't a BSON int32. -// pub fn as_i32(self) -> Result { -// self.validate_type(ElementType::Int32)?; -// i32_from_slice(self.data) -// } - -// /// Gets the i64 that's referenced or returns an error if the value isn't a BSON int64. -// pub fn as_i64(self) -> Result { -// self.validate_type(ElementType::Int64)?; -// i64_from_slice(self.data) -// } - -// /// Gets the decimal that's referenced or returns an error if the value isn't a BSON -// Decimal128. pub fn as_decimal128(self) -> Result { -// self.validate_type(ElementType::Decimal128)?; -// let bytes: [u8; 128 / 8] = self.data.try_into().map_err(|_| { -// Error::new_without_key(ErrorKind::MalformedValue { -// message: format!("decimal128 value has invalid length: {}", self.data.len()), -// }) -// })?; -// Ok(Decimal128::from_bytes(bytes)) -// } - -// /// Gets the null value that's referenced or returns an error if the value isn't a BSON null. -// pub fn as_null(self) -> Result<()> { -// self.validate_type(ElementType::Null) -// } -// } - impl<'a> TryFrom> for Bson { type Error = Error; @@ -506,6 +353,7 @@ impl<'a> RawJavaScriptCodeWithScope<'a> { } } +/// A BSON DB pointer value referencing raw bytes stored elesewhere. #[derive(Debug, Clone, Copy, PartialEq)] pub struct RawDbPointer<'a> { pub(crate) namespace: &'a str, diff --git a/src/raw/error.rs b/src/raw/error.rs index 9be2d279..1fc020d5 100644 --- a/src/raw/error.rs +++ b/src/raw/error.rs @@ -83,3 +83,53 @@ impl std::fmt::Display for Error { impl std::error::Error for Error {} pub type Result = std::result::Result; +pub type ValueAccessResult = std::result::Result; + +/// Error to indicate that either a value was empty or it contained an unexpected +/// type, for use with the direct getters (e.g. [`RawDocumentRef::get_str`]). +#[derive(Debug, PartialEq, Clone)] +#[non_exhaustive] +pub struct ValueAccessError { + /// The type of error that was encountered. + pub kind: ValueAccessErrorKind, + + /// The key at which the error was encountered. + pub key: String, +} + +#[derive(Debug, PartialEq, Clone)] +#[non_exhaustive] +pub enum ValueAccessErrorKind { + /// Cannot find the expected field with the specified key + NotPresent, + + /// Found a Bson value with the specified key, but not with the expected type + #[non_exhaustive] + UnexpectedType { + expected: ElementType, + actual: ElementType, + }, + + /// An error was encountered attempting to decode the document. + InvalidBson(super::Error), +} + +impl std::fmt::Display for ValueAccessError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let prefix = format!("error at key: \"{}\": ", self.key); + + match &self.kind { + ValueAccessErrorKind::UnexpectedType { actual, expected } => write!( + f, + "{} unexpected element type: {:?}, expected: {:?}", + prefix, actual, expected + ), + ValueAccessErrorKind::InvalidBson(error) => { + write!(f, "{}: {}", prefix, error) + } + ValueAccessErrorKind::NotPresent => write!(f, "{}value not present", prefix), + } + } +} + +impl std::error::Error for ValueAccessError {} From e7ff8fba7bafe7d83fc637c8dab133015ba71402 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 13 Oct 2021 16:38:52 -0400 Subject: [PATCH 33/48] rename RawDocumentRef to RawDoc, update documentation --- src/raw/array.rs | 70 ++++++++++++++++++---- src/raw/doc.rs | 121 +++++++++++++++++++-------------------- src/raw/elem.rs | 14 ++--- src/raw/error.rs | 2 +- src/raw/mod.rs | 41 ++++++++----- src/raw/test/mod.rs | 10 ++-- src/tests/spec/corpus.rs | 8 +-- 7 files changed, 163 insertions(+), 103 deletions(-) diff --git a/src/raw/array.rs b/src/raw/array.rs index 96f75ff4..c06e5c0c 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -6,30 +6,80 @@ use super::{ Iter, RawBinary, RawBson, - RawDocumentRef, + RawDoc, RawRegex, Result, }; use crate::{oid::ObjectId, spec::ElementType, Bson, DateTime, Timestamp}; -/// A BSON array referencing raw bytes stored elsewhere. +/// A slice of a BSON document containing a BSON array value (akin to [`std::str`]). This can be retrieved from a +/// [`RawDoc`] via [`RawDoc::get`]. +/// +/// This is an _unsized_ type, meaning that it must always be used behind a pointer like `&`. +/// +/// Accessing elements within a [`RawArray`] is similar to element access in [`bson::Document`], +/// but because the contents are parsed during iteration instead of at creation time, format errors +/// can happen at any time during use. +/// +/// Iterating over a [`RawArray`] yields either an error or a key-value pair that borrows from the +/// original document without making any additional allocations. +/// +/// ``` +/// # use bson::raw::{Error}; +/// use bson::{doc, raw::RawDoc}; +/// +/// let doc = doc! { +/// "x": [1, true, "two", 5.5] +/// }; +/// let bytes = bson::to_vec(&doc).unwrap(); +/// +/// let rawdoc = RawDoc::new(bytes.as_slice())?; +/// let rawarray = rawdoc.get_array("x")?; +/// +/// for v in rawarray { +/// println!("{:?}", v?); +/// } +/// # Ok::<(), Error>(()) +/// ``` +/// +/// Individual elements can be accessed using [`RawArray::get`] or any of +/// the type-specific getters, such as [`RawArray::get_object_id`] or +/// [`RawArray::get_str`]. Note that accessing elements is an O(N) operation, as it +/// requires iterating through the array from the beginning to find the requested index. +/// +/// ``` +/// # use bson::raw::{ValueAccessError}; +/// use bson::{doc, raw::RawDoc}; +/// +/// let doc = doc! { +/// "x": [1, true, "two", 5.5] +/// }; +/// let bytes = bson::to_vec(&doc).unwrap(); +/// +/// let rawdoc = RawDoc::new(bytes.as_slice())?; +/// let rawarray = rawdoc.get_array("x")?; +/// +/// assert_eq!(rawarray.get_bool(1)?, true); +/// # Ok::<(), ValueAccessError>(()) +/// ``` +#[derive(Debug)] #[repr(transparent)] pub struct RawArray { - pub(crate) doc: RawDocumentRef, + pub(crate) doc: RawDoc, } impl RawArray { - pub(crate) fn from_doc(doc: &RawDocumentRef) -> &RawArray { + pub(crate) fn from_doc(doc: &RawDoc) -> &RawArray { // SAFETY: // // Dereferencing a raw pointer requires unsafe due to the potential that the pointer is // null, dangling, or misaligned. We know the pointer is not null or dangling due to the - // fact that it's created by a safe reference. Converting &RawDocumentRef to *const - // RawDocumentRef will be properly aligned due to them being references to the same type, - // and converting *const RawDocumentRef to *const RawArray is aligned due to the fact that - // the only field in a RawArray is a RawDocumentRef, meaning the structs are represented + // fact that it's created by a safe reference. Converting &RawDoc to *const + // RawDoc will be properly aligned due to them being references to the same type, + // and converting *const RawDoc to *const RawArray is aligned due to the fact that + // the only field in a RawArray is a RawDoc, meaning the structs are represented // identically at the byte level. - unsafe { &*(doc as *const RawDocumentRef as *const RawArray) } + unsafe { &*(doc as *const RawDoc as *const RawArray) } } /// Gets a reference to the value at the given index. @@ -79,7 +129,7 @@ impl RawArray { /// Gets a reference to the document at the given index or returns an error if the /// value at that index isn't a document. - pub fn get_document(&self, index: usize) -> ValueAccessResult<&RawDocumentRef> { + pub fn get_document(&self, index: usize) -> ValueAccessResult<&RawDoc> { self.get_with(index, ElementType::EmbeddedDocument, RawBson::as_document) } diff --git a/src/raw/doc.rs b/src/raw/doc.rs index 4c41d102..50f9f9d6 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -37,12 +37,12 @@ use crate::{oid::ObjectId, spec::ElementType, Document}; /// A BSON document, stored as raw bytes on the heap. This can be created from a `Vec` or /// a [`bson::Document`]. /// -/// Accessing elements within a `RawDocument` is similar to element access in [bson::Document], but -/// because the contents are parsed during iteration, instead of at creation time, format errors can -/// happen at any time during use. +/// Accessing elements within a [`RawDocument`] is similar to element access in [`bson::Document`], +/// but because the contents are parsed during iteration instead of at creation time, format errors +/// can happen at any time during use. /// -/// Iterating over a RawDocument yields either an error or a key-value pair that borrows from the -/// original document without making any additional allocations. +/// Iterating over a [`RawDocument`] yields either an error or a key-value pair that borrows from +/// the original document without making any additional allocations. /// /// ``` /// # use bson::raw::{RawDocument, Error}; @@ -55,11 +55,11 @@ use crate::{oid::ObjectId, spec::ElementType, Document}; /// # Ok::<(), Error>(()) /// ``` /// -/// Individual elements can be accessed using [`RawDocument::get`](RawDocument::get) or any of the -/// type-specific getters, such as [`RawDocument::get_object_id`](RawDocument::get_object_id) or -/// [`RawDocument::get_str`](RawDocument::get_str). Note that accessing elements is an O(N) -/// operation, as it requires iterating through the document from the beginning to find the -/// requested key. +/// This type implements `Deref` to `RawDoc`, meaning that all methods on `RawDoc` slices are +/// available on `RawDocument` values as well. This includes [`RawDoc::get`] or any of the +/// type-specific getters, such as [`RawDoc::get_object_id`] or [`RawDoc::get_str`]. Note that +/// accessing elements is an O(N) operation, as it requires iterating through the document from the +/// beginning to find the requested key. /// /// ``` /// # use bson::raw::{RawDocument, Error}; @@ -91,7 +91,7 @@ impl RawDocument { /// # Ok::<(), Error>(()) /// ``` pub fn new(data: Vec) -> Result { - let _ = RawDocumentRef::new(data.as_slice())?; + let _ = RawDoc::new(data.as_slice())?; Ok(Self { data }) } @@ -162,13 +162,13 @@ impl RawDocument { } } -impl<'a> From for Cow<'a, RawDocumentRef> { +impl<'a> From for Cow<'a, RawDoc> { fn from(rd: RawDocument) -> Self { Cow::Owned(rd) } } -impl<'a> From<&'a RawDocument> for Cow<'a, RawDocumentRef> { +impl<'a> From<&'a RawDocument> for Cow<'a, RawDoc> { fn from(rd: &'a RawDocument) -> Self { Cow::Borrowed(rd.as_ref()) } @@ -195,19 +195,19 @@ impl<'a> IntoIterator for &'a RawDocument { } } -impl AsRef for RawDocument { - fn as_ref(&self) -> &RawDocumentRef { - RawDocumentRef::new_unchecked(&self.data) +impl AsRef for RawDocument { + fn as_ref(&self) -> &RawDoc { + RawDoc::new_unchecked(&self.data) } } -impl Borrow for RawDocument { - fn borrow(&self) -> &RawDocumentRef { +impl Borrow for RawDocument { + fn borrow(&self) -> &RawDoc { &*self } } -impl ToOwned for RawDocumentRef { +impl ToOwned for RawDoc { type Owned = RawDocument; fn to_owned(&self) -> Self::Owned { @@ -215,22 +215,25 @@ impl ToOwned for RawDocumentRef { } } -/// A BSON document referencing raw bytes stored elsewhere. This can be created from a -/// [RawDocument] or any type that contains valid BSON data, and can be referenced as a `[u8]`, -/// including static binary literals, [Vec](std::vec::Vec), or arrays. +/// A slice of a BSON document (akin to [`std::str`]). This can be created from a +/// [`RawDocument`] or any type that contains valid BSON data, including static binary literals, +/// [Vec](std::vec::Vec), or arrays. +/// +/// This is an _unsized_ type, meaning that it must always be used behind a pointer like `&`. For an +/// owned version of this type, see [`RawDocument`]. /// -/// Accessing elements within a `RawDocumentRef` is similar to element access in [bson::Document], -/// but because the contents are parsed during iteration, instead of at creation time, format errors +/// Accessing elements within a [`RawDoc`] is similar to element access in [`bson::Document`], +/// but because the contents are parsed during iteration instead of at creation time, format errors /// can happen at any time during use. /// -/// Iterating over a RawDocumentRef yields either an error or a key-value pair that borrows from the +/// Iterating over a [`RawDoc`] yields either an error or a key-value pair that borrows from the /// original document without making any additional allocations. /// ``` /// # use bson::raw::{Error}; -/// use bson::raw::RawDocumentRef; +/// use bson::raw::RawDoc; /// -/// let doc = RawDocumentRef::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00")?; +/// let doc = RawDoc::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00")?; /// let mut iter = doc.into_iter(); /// let (key, value) = iter.next().unwrap()?; /// assert_eq!(key, "hi"); @@ -239,10 +242,9 @@ impl ToOwned for RawDocumentRef { /// # Ok::<(), Error>(()) /// ``` /// -/// Individual elements can be accessed using [`RawDocumentRef::get`](RawDocumentRef::get) or any of -/// the type-specific getters, such as -/// [`RawDocumentRef::get_object_id`](RawDocumentRef::get_object_id) or [`RawDocumentRef:: -/// get_str`](RawDocumentRef::get_str). Note that accessing elements is an O(N) operation, as it +/// Individual elements can be accessed using [`RawDoc::get`] or any of +/// the type-specific getters, such as [`RawDoc::get_object_id`] or +/// [`RawDoc::get_str`]. Note that accessing elements is an O(N) operation, as it /// requires iterating through the document from the beginning to find the requested key. /// /// ``` @@ -253,12 +255,12 @@ impl ToOwned for RawDocumentRef { /// ``` #[derive(Debug)] #[repr(transparent)] -pub struct RawDocumentRef { +pub struct RawDoc { data: [u8], } -impl RawDocumentRef { - /// Constructs a new RawDocumentRef, validating _only_ the +impl RawDoc { + /// Constructs a new RawDoc, validating _only_ the /// following invariants: /// * `data` is at least five bytes long (the minimum for a valid BSON document) /// * the initial four bytes of `data` accurately represent the length of the bytes as @@ -271,12 +273,12 @@ impl RawDocumentRef { /// the RawDocument will return Errors where appropriate. /// /// ``` - /// use bson::raw::RawDocumentRef; + /// use bson::raw::RawDoc; /// - /// let doc = RawDocumentRef::new(b"\x05\0\0\0\0")?; + /// let doc = RawDoc::new(b"\x05\0\0\0\0")?; /// # Ok::<(), bson::raw::Error>(()) /// ``` - pub fn new + ?Sized>(data: &D) -> Result<&RawDocumentRef> { + pub fn new + ?Sized>(data: &D) -> Result<&RawDoc> { let data = data.as_ref(); if data.len() < 5 { @@ -308,30 +310,30 @@ impl RawDocumentRef { }); } - Ok(RawDocumentRef::new_unchecked(data)) + Ok(RawDoc::new_unchecked(data)) } /// Creates a new Doc referencing the provided data slice. - fn new_unchecked + ?Sized>(data: &D) -> &RawDocumentRef { + fn new_unchecked + ?Sized>(data: &D) -> &RawDoc { // SAFETY: // // Dereferencing a raw pointer requires unsafe due to the potential that the pointer is // null, dangling, or misaligned. We know the pointer is not null or dangling due to the // fact that it's created by a safe reference. Converting &[u8] to *const [u8] will be // properly aligned due to them being references to the same type, and converting *const - // [u8] to *const RawDocumentRef is aligned due to the fact that the only field in a - // RawDocumentRef is a [u8], meaning the structs are represented identically at the byte + // [u8] to *const RawDoc is aligned due to the fact that the only field in a + // RawDoc is a [u8], meaning the structs are represented identically at the byte // level. - unsafe { &*(data.as_ref() as *const [u8] as *const RawDocumentRef) } + unsafe { &*(data.as_ref() as *const [u8] as *const RawDoc) } } /// Creates a new RawDocument with an owned copy of the BSON bytes. /// /// ``` - /// use bson::raw::{RawDocumentRef, RawDocument, Error}; + /// use bson::raw::{RawDoc, RawDocument, Error}; /// /// let data = b"\x05\0\0\0\0"; - /// let doc_ref = RawDocumentRef::new(data)?; + /// let doc_ref = RawDoc::new(data)?; /// let doc: RawDocument = doc_ref.to_raw_document(); /// # Ok::<(), Error>(()) pub fn to_raw_document(&self) -> RawDocument { @@ -457,10 +459,7 @@ impl RawDocumentRef { /// assert!(doc.get_document("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` - pub fn get_document<'a>( - &'a self, - key: impl AsRef, - ) -> ValueAccessResult<&'a RawDocumentRef> { + pub fn get_document<'a>(&'a self, key: impl AsRef) -> ValueAccessResult<&'a RawDoc> { self.get_with(key, ElementType::EmbeddedDocument, RawBson::as_document) } @@ -684,30 +683,30 @@ impl RawDocumentRef { } } -impl AsRef for RawDocumentRef { - fn as_ref(&self) -> &RawDocumentRef { +impl AsRef for RawDoc { + fn as_ref(&self) -> &RawDoc { self } } impl Deref for RawDocument { - type Target = RawDocumentRef; + type Target = RawDoc; fn deref(&self) -> &Self::Target { - RawDocumentRef::new_unchecked(&self.data) + RawDoc::new_unchecked(&self.data) } } -impl<'a> From<&'a RawDocumentRef> for Cow<'a, RawDocumentRef> { - fn from(rdr: &'a RawDocumentRef) -> Self { +impl<'a> From<&'a RawDoc> for Cow<'a, RawDoc> { + fn from(rdr: &'a RawDoc) -> Self { Cow::Borrowed(rdr) } } -impl TryFrom<&RawDocumentRef> for crate::Document { +impl TryFrom<&RawDoc> for crate::Document { type Error = Error; - fn try_from(rawdoc: &RawDocumentRef) -> Result { + fn try_from(rawdoc: &RawDoc) -> Result { rawdoc .into_iter() .map(|res| res.and_then(|(k, v)| Ok((k.to_owned(), v.try_into()?)))) @@ -715,7 +714,7 @@ impl TryFrom<&RawDocumentRef> for crate::Document { } } -impl<'a> IntoIterator for &'a RawDocumentRef { +impl<'a> IntoIterator for &'a RawDoc { type IntoIter = Iter<'a>; type Item = Result<(&'a str, RawBson<'a>)>; @@ -730,7 +729,7 @@ impl<'a> IntoIterator for &'a RawDocumentRef { /// An iterator over the document's entries. pub struct Iter<'a> { - doc: &'a RawDocumentRef, + doc: &'a RawDoc, offset: usize, /// Whether the underlying doc is assumed to be valid or if an error has been encountered. @@ -763,7 +762,7 @@ impl<'a> Iter<'a> { Ok(oid) } - fn next_document(&self, starting_at: usize) -> Result<&'a RawDocumentRef> { + fn next_document(&self, starting_at: usize) -> Result<&'a RawDoc> { self.verify_enough_bytes(starting_at, MIN_BSON_DOCUMENT_SIZE as usize)?; let size = i32_from_slice(&self.doc.data[starting_at..])? as usize; @@ -784,7 +783,7 @@ impl<'a> Iter<'a> { }, }); } - RawDocumentRef::new(&self.doc.data[starting_at..end]) + RawDoc::new(&self.doc.data[starting_at..end]) } } @@ -947,7 +946,7 @@ impl<'a> Iterator for Iter<'a> { let slice = &self.doc.data[valueoffset..(valueoffset + length)]; let code = read_lenencoded(&slice[4..])?; let scope_start = 4 + 4 + code.len() + 1; - let scope = RawDocumentRef::new(&slice[scope_start..])?; + let scope = RawDoc::new(&slice[scope_start..])?; ( RawBson::JavaScriptCodeWithScope(RawJavaScriptCodeWithScope { code, diff --git a/src/raw/elem.rs b/src/raw/elem.rs index bec60baf..cec88af9 100644 --- a/src/raw/elem.rs +++ b/src/raw/elem.rs @@ -1,6 +1,6 @@ use std::convert::{TryFrom, TryInto}; -use super::{Error, RawArray, RawDocumentRef, Result}; +use super::{Error, RawArray, RawDoc, Result}; use crate::{ oid::{self, ObjectId}, spec::{BinarySubtype, ElementType}, @@ -11,7 +11,7 @@ use crate::{ }; /// A BSON value referencing raw bytes stored elsewhere. -#[derive(Clone, Copy)] +#[derive(Debug, Clone, Copy)] pub enum RawBson<'a> { /// 64-bit binary floating point Double(f64), @@ -20,7 +20,7 @@ pub enum RawBson<'a> { /// Array Array(&'a RawArray), /// Embedded document - Document(&'a RawDocumentRef), + Document(&'a RawDoc), /// Boolean value Boolean(bool), /// Null value @@ -112,9 +112,9 @@ impl<'a> RawBson<'a> { } } - /// Gets the [`&RawDocumentRef`] that's referenced or returns `None` if the referenced value + /// Gets the [`&RawDoc`] that's referenced or returns `None` if the referenced value /// isn't a BSON document. - pub fn as_document(self) -> Option<&'a RawDocumentRef> { + pub fn as_document(self) -> Option<&'a RawDoc> { match self { RawBson::Document(v) => Some(v), _ => None, @@ -338,7 +338,7 @@ impl<'a> RawRegex<'a> { #[derive(Clone, Copy, Debug)] pub struct RawJavaScriptCodeWithScope<'a> { pub(crate) code: &'a str, - pub(crate) scope: &'a RawDocumentRef, + pub(crate) scope: &'a RawDoc, } impl<'a> RawJavaScriptCodeWithScope<'a> { @@ -348,7 +348,7 @@ impl<'a> RawJavaScriptCodeWithScope<'a> { } /// Gets the scope in the value. - pub fn scope(self) -> &'a RawDocumentRef { + pub fn scope(self) -> &'a RawDoc { self.scope } } diff --git a/src/raw/error.rs b/src/raw/error.rs index 1fc020d5..04b6a6da 100644 --- a/src/raw/error.rs +++ b/src/raw/error.rs @@ -86,7 +86,7 @@ pub type Result = std::result::Result; pub type ValueAccessResult = std::result::Result; /// Error to indicate that either a value was empty or it contained an unexpected -/// type, for use with the direct getters (e.g. [`RawDocumentRef::get_str`]). +/// type, for use with the direct getters (e.g. [`RawDoc::get_str`]). #[derive(Debug, PartialEq, Clone)] #[non_exhaustive] pub struct ValueAccessError { diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 78a1b5cc..c21ad617 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -1,7 +1,17 @@ -//! A RawDocument can be created from a `Vec` containing raw BSON data, and elements -//! accessed via methods similar to those available on the Document type. Note that rawbson returns -//! a raw::Result>, since the bytes contained in the document are not fully validated -//! until trying to access the contained data. +//! An API for interacting with raw BSON bytes. +//! +//! This module provides two document types, [`RawDocument`] and [`RawDoc`] (akin to [`std::String`] +//! and [`std::str`], for working with raw BSON documents. These types differ from the regular +//! [`Document`] type in that their storage is BSON bytes rather than a hash-map like Rust type. In +//! certain circumstances, these types can be leveraged for increased performance. +//! +//! This module also provides a [`RawBson`] type for modeling any borrowed BSON element and a [`RawArray`] type +//! for modeling a borrowed slice of a document containing a BSON array element. +//! +//! A [`RawDocument`] can be created from a `Vec` containing raw BSON data, and elements +//! accessed via methods similar to those available on the [`Document`] type. Note that +//! [`RawDocument::get`] returns a [`raw::Result>`], since the bytes contained in +//! the document are not fully validated until trying to access the contained data. //! //! ```rust //! use bson::raw::{ @@ -20,10 +30,11 @@ //! # Ok::<(), bson::raw::Error>(()) //! ``` //! -//! ### bson-rust interop +//! ### [`Document`] interop //! -//! A [`RawDocument`] can be created from a [`bson::document::Document`]. Internally, this -//! serializes the `Document` to a `Vec`, and then includes those bytes in the [`RawDocument`]. +//! A [`RawDocument`] can be created from a [`Document`]. Internally, this +//! serializes the [`Document`] to a `Vec`, and then includes those bytes in the +//! [`RawDocument`]. //! //! ```rust //! use bson::{ @@ -36,7 +47,7 @@ //! "cruel": "world" //! } //! }; - +//! //! let raw = RawDocument::from_document(&document)?; //! let value: Option<&str> = raw //! .get_document("goodbye")? @@ -51,26 +62,26 @@ //! # Ok::<(), bson::raw::Error>(()) //! ``` //! -//! ### Reference types +//! ### Reference types ([`RawDoc`]) //! -//! A BSON document can also be accessed with the [`RawDocumentRef`] reference type, which is an +//! A BSON document can also be accessed with the [`RawDoc`] type, which is an //! unsized type that represents the BSON payload as a `[u8]`. This allows accessing nested -//! documents without reallocation. [RawDocumentRef] must always be accessed via a pointer type, +//! documents without reallocation. [`RawDoc`] must always be accessed via a pointer type, //! similarly to `[T]` and `str`. //! //! The below example constructs a bson document in a stack-based array, //! and extracts a &str from it, performing no heap allocation. //! ```rust -//! use bson::raw::RawDocumentRef; +//! use bson::raw::RawDoc; //! //! let bytes = b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00"; -//! assert_eq!(RawDocumentRef::new(bytes)?.get_str("hi")?, Some("y'all")); +//! assert_eq!(RawDoc::new(bytes)?.get_str("hi")?, Some("y'all")); //! # Ok::<(), bson::raw::Error>(()) //! ``` //! //! ### Iteration //! -//! [`RawDocumentRef`] implements [`IntoIterator`](std::iter::IntoIterator), which can also be +//! [`RawDoc`] implements [`IntoIterator`](std::iter::IntoIterator), which can also be //! accessed via [`RawDocument::iter`]. //! ```rust @@ -113,7 +124,7 @@ use crate::de::MIN_BSON_STRING_SIZE; pub use self::{ array::{RawArray, RawArrayIter}, - doc::{Iter, RawDocument, RawDocumentRef}, + doc::{Iter, RawDocument, RawDoc}, elem::{RawBinary, RawBson, RawJavaScriptCodeWithScope, RawRegex}, error::{Error, ErrorKind, Result}, }; diff --git a/src/raw/test/mod.rs b/src/raw/test/mod.rs index b5e7eb4c..caecb116 100644 --- a/src/raw/test/mod.rs +++ b/src/raw/test/mod.rs @@ -27,7 +27,7 @@ fn string_from_document() { "that": "second", "something": "else", }); - let rawdoc = RawDocumentRef::new(&docbytes).unwrap(); + let rawdoc = RawDoc::new(&docbytes).unwrap(); assert_eq!( rawdoc.get("that").unwrap().unwrap().as_str().unwrap(), "second", @@ -42,7 +42,7 @@ fn nested_document() { "double": 5.5, }, }); - let rawdoc = RawDocumentRef::new(&docbytes).unwrap(); + let rawdoc = RawDoc::new(&docbytes).unwrap(); let subdoc = rawdoc .get("outer") .expect("get doc result") @@ -77,7 +77,7 @@ fn iterate() { "peanut butter": "chocolate", "easy as": {"do": 1, "re": 2, "mi": 3}, }); - let rawdoc = RawDocumentRef::new(&docbytes).expect("malformed bson document"); + let rawdoc = RawDoc::new(&docbytes).expect("malformed bson document"); let mut dociter = rawdoc.into_iter(); let next = dociter.next().expect("no result").expect("invalid bson"); assert_eq!(next.0, "apples"); @@ -114,7 +114,7 @@ fn rawdoc_to_doc() { "end": "END", }); - let rawdoc = RawDocumentRef::new(&docbytes).expect("invalid document"); + let rawdoc = RawDoc::new(&docbytes).expect("invalid document"); let _doc: crate::Document = rawdoc.try_into().expect("invalid bson"); } @@ -428,7 +428,7 @@ fn into_bson_conversion() { "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] }, "boolean": false, }); - let rawbson = RawBson::Document(RawDocumentRef::new(docbytes.as_slice()).unwrap()); + let rawbson = RawBson::Document(RawDoc::new(docbytes.as_slice()).unwrap()); let b: Bson = rawbson.try_into().expect("invalid bson"); let doc = b.as_document().expect("not a document"); assert_eq!(*doc.get("f64").expect("f64 not found"), Bson::Double(2.5)); diff --git a/src/tests/spec/corpus.rs b/src/tests/spec/corpus.rs index c1675bfc..d2e2158f 100644 --- a/src/tests/spec/corpus.rs +++ b/src/tests/spec/corpus.rs @@ -3,7 +3,7 @@ use std::{ str::FromStr, }; -use crate::{raw::RawDocumentRef, tests::LOCK, Bson, Document}; +use crate::{raw::RawDoc, tests::LOCK, Bson, Document}; use pretty_assertions::assert_eq; use serde::Deserialize; @@ -79,7 +79,7 @@ fn run_test(test: TestFile) { let todocument_documentfromreader_cb: Document = crate::to_document(&documentfromreader_cb).expect(&description); - let document_from_raw_document: Document = RawDocumentRef::new(canonical_bson.as_slice()) + let document_from_raw_document: Document = RawDoc::new(canonical_bson.as_slice()) .expect(&description) .try_into() .expect(&description); @@ -212,7 +212,7 @@ fn run_test(test: TestFile) { description, ); - let document_from_raw_document: Document = RawDocumentRef::new(db.as_slice()) + let document_from_raw_document: Document = RawDoc::new(db.as_slice()) .expect(&description) .try_into() .expect(&description); @@ -411,7 +411,7 @@ fn run_test(test: TestFile) { Document::from_reader(bson.as_slice()).expect_err(&description); crate::from_reader::<_, Document>(bson.as_slice()).expect_err(description.as_str()); - if let Ok(doc) = RawDocumentRef::new(bson.as_slice()) { + if let Ok(doc) = RawDoc::new(bson.as_slice()) { Document::try_from(doc).expect_err(&description.as_str()); } From 69fc4270a7b321f6f38466d02e35687c9a196965 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 13 Oct 2021 16:44:20 -0400 Subject: [PATCH 34/48] rename RawArray to RawArr --- src/raw/array.rs | 46 +++++++++++++++++++++++----------------------- src/raw/doc.rs | 9 +++------ src/raw/elem.rs | 8 ++++---- src/raw/mod.rs | 16 ++++++++-------- 4 files changed, 38 insertions(+), 41 deletions(-) diff --git a/src/raw/array.rs b/src/raw/array.rs index c06e5c0c..387747d6 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -12,16 +12,16 @@ use super::{ }; use crate::{oid::ObjectId, spec::ElementType, Bson, DateTime, Timestamp}; -/// A slice of a BSON document containing a BSON array value (akin to [`std::str`]). This can be retrieved from a -/// [`RawDoc`] via [`RawDoc::get`]. +/// A slice of a BSON document containing a BSON array value (akin to [`std::str`]). This can be +/// retrieved from a [`RawDoc`] via [`RawDoc::get`]. /// /// This is an _unsized_ type, meaning that it must always be used behind a pointer like `&`. /// -/// Accessing elements within a [`RawArray`] is similar to element access in [`bson::Document`], +/// Accessing elements within a [`RawArr`] is similar to element access in [`bson::Document`], /// but because the contents are parsed during iteration instead of at creation time, format errors /// can happen at any time during use. /// -/// Iterating over a [`RawArray`] yields either an error or a key-value pair that borrows from the +/// Iterating over a [`RawArr`] yields either an error or a key-value pair that borrows from the /// original document without making any additional allocations. /// /// ``` @@ -42,9 +42,9 @@ use crate::{oid::ObjectId, spec::ElementType, Bson, DateTime, Timestamp}; /// # Ok::<(), Error>(()) /// ``` /// -/// Individual elements can be accessed using [`RawArray::get`] or any of -/// the type-specific getters, such as [`RawArray::get_object_id`] or -/// [`RawArray::get_str`]. Note that accessing elements is an O(N) operation, as it +/// Individual elements can be accessed using [`RawArr::get`] or any of +/// the type-specific getters, such as [`RawArr::get_object_id`] or +/// [`RawArr::get_str`]. Note that accessing elements is an O(N) operation, as it /// requires iterating through the array from the beginning to find the requested index. /// /// ``` @@ -64,22 +64,22 @@ use crate::{oid::ObjectId, spec::ElementType, Bson, DateTime, Timestamp}; /// ``` #[derive(Debug)] #[repr(transparent)] -pub struct RawArray { +pub struct RawArr { pub(crate) doc: RawDoc, } -impl RawArray { - pub(crate) fn from_doc(doc: &RawDoc) -> &RawArray { +impl RawArr { + pub(crate) fn from_doc(doc: &RawDoc) -> &RawArr { // SAFETY: // // Dereferencing a raw pointer requires unsafe due to the potential that the pointer is // null, dangling, or misaligned. We know the pointer is not null or dangling due to the // fact that it's created by a safe reference. Converting &RawDoc to *const // RawDoc will be properly aligned due to them being references to the same type, - // and converting *const RawDoc to *const RawArray is aligned due to the fact that - // the only field in a RawArray is a RawDoc, meaning the structs are represented + // and converting *const RawDoc to *const RawArr is aligned due to the fact that + // the only field in a RawArr is a RawDoc, meaning the structs are represented // identically at the byte level. - unsafe { &*(doc as *const RawDoc as *const RawArray) } + unsafe { &*(doc as *const RawDoc as *const RawArr) } } /// Gets a reference to the value at the given index. @@ -135,7 +135,7 @@ impl RawArray { /// Gets a reference to the array at the given index or returns an error if the /// value at that index isn't a array. - pub fn get_array(&self, index: usize) -> ValueAccessResult<&RawArray> { + pub fn get_array(&self, index: usize) -> ValueAccessResult<&RawArr> { self.get_with(index, ElementType::Array, RawBson::as_array) } @@ -187,16 +187,16 @@ impl RawArray { self.get_with(index, ElementType::Int64, RawBson::as_i64) } - /// Gets a reference to the raw bytes of the RawArray. + /// Gets a reference to the raw bytes of the RawArr. pub fn as_bytes(&self) -> &[u8] { self.doc.as_bytes() } } -impl TryFrom<&RawArray> for Vec { +impl TryFrom<&RawArr> for Vec { type Error = Error; - fn try_from(arr: &RawArray) -> Result> { + fn try_from(arr: &RawArr) -> Result> { arr.into_iter() .map(|result| { let rawbson = result?; @@ -206,23 +206,23 @@ impl TryFrom<&RawArray> for Vec { } } -impl<'a> IntoIterator for &'a RawArray { - type IntoIter = RawArrayIter<'a>; +impl<'a> IntoIterator for &'a RawArr { + type IntoIter = RawArrIter<'a>; type Item = Result>; - fn into_iter(self) -> RawArrayIter<'a> { - RawArrayIter { + fn into_iter(self) -> RawArrIter<'a> { + RawArrIter { inner: self.doc.into_iter(), } } } /// An iterator over borrowed raw BSON array values. -pub struct RawArrayIter<'a> { +pub struct RawArrIter<'a> { inner: Iter<'a>, } -impl<'a> Iterator for RawArrayIter<'a> { +impl<'a> Iterator for RawArrIter<'a> { type Item = Result>; fn next(&mut self) -> Option>> { diff --git a/src/raw/doc.rs b/src/raw/doc.rs index 50f9f9d6..109d90f0 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -26,7 +26,7 @@ use super::{ read_lenencoded, read_nullterminated, Error, - RawArray, + RawArr, RawBinary, RawBson, RawRegex, @@ -484,7 +484,7 @@ impl RawDoc { /// assert!(doc.get_array("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` - pub fn get_array<'a>(&'a self, key: impl AsRef) -> ValueAccessResult<&'a RawArray> { + pub fn get_array<'a>(&'a self, key: impl AsRef) -> ValueAccessResult<&'a RawArr> { self.get_with(key, ElementType::Array, RawBson::as_array) } @@ -859,10 +859,7 @@ impl<'a> Iterator for Iter<'a> { } ElementType::Array => { let doc = self.next_document(valueoffset)?; - ( - RawBson::Array(RawArray::from_doc(doc)), - doc.as_bytes().len(), - ) + (RawBson::Array(RawArr::from_doc(doc)), doc.as_bytes().len()) } ElementType::Binary => { let len = i32_from_slice(&self.doc.data[valueoffset..])? as usize; diff --git a/src/raw/elem.rs b/src/raw/elem.rs index cec88af9..a8f90bda 100644 --- a/src/raw/elem.rs +++ b/src/raw/elem.rs @@ -1,6 +1,6 @@ use std::convert::{TryFrom, TryInto}; -use super::{Error, RawArray, RawDoc, Result}; +use super::{Error, RawArr, RawDoc, Result}; use crate::{ oid::{self, ObjectId}, spec::{BinarySubtype, ElementType}, @@ -18,7 +18,7 @@ pub enum RawBson<'a> { /// UTF-8 string String(&'a str), /// Array - Array(&'a RawArray), + Array(&'a RawArr), /// Embedded document Document(&'a RawDoc), /// Boolean value @@ -103,9 +103,9 @@ impl<'a> RawBson<'a> { } } - /// Gets the [`&RawArray`] that's referenced or returns `None` if the referenced value isn't a + /// Gets the [`&RawArr`] that's referenced or returns `None` if the referenced value isn't a /// BSON array. - pub fn as_array(self) -> Option<&'a RawArray> { + pub fn as_array(self) -> Option<&'a RawArr> { match self { RawBson::Array(v) => Some(v), _ => None, diff --git a/src/raw/mod.rs b/src/raw/mod.rs index c21ad617..6c20ed0d 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -5,9 +5,9 @@ //! [`Document`] type in that their storage is BSON bytes rather than a hash-map like Rust type. In //! certain circumstances, these types can be leveraged for increased performance. //! -//! This module also provides a [`RawBson`] type for modeling any borrowed BSON element and a [`RawArray`] type -//! for modeling a borrowed slice of a document containing a BSON array element. -//! +//! This module also provides a [`RawBson`] type for modeling any borrowed BSON element and a +//! [`RawArr`] type for modeling a borrowed slice of a document containing a BSON array element. +//! //! A [`RawDocument`] can be created from a `Vec` containing raw BSON data, and elements //! accessed via methods similar to those available on the [`Document`] type. Note that //! [`RawDocument::get`] returns a [`raw::Result>`], since the bytes contained in @@ -47,7 +47,7 @@ //! "cruel": "world" //! } //! }; -//! +//! //! let raw = RawDocument::from_document(&document)?; //! let value: Option<&str> = raw //! .get_document("goodbye")? @@ -61,7 +61,7 @@ //! ); //! # Ok::<(), bson::raw::Error>(()) //! ``` -//! +//! //! ### Reference types ([`RawDoc`]) //! //! A BSON document can also be accessed with the [`RawDoc`] type, which is an @@ -78,7 +78,7 @@ //! assert_eq!(RawDoc::new(bytes)?.get_str("hi")?, Some("y'all")); //! # Ok::<(), bson::raw::Error>(()) //! ``` -//! +//! //! ### Iteration //! //! [`RawDoc`] implements [`IntoIterator`](std::iter::IntoIterator), which can also be @@ -123,8 +123,8 @@ use std::convert::{TryFrom, TryInto}; use crate::de::MIN_BSON_STRING_SIZE; pub use self::{ - array::{RawArray, RawArrayIter}, - doc::{Iter, RawDocument, RawDoc}, + array::{RawArr, RawArrIter}, + doc::{Iter, RawDoc, RawDocument}, elem::{RawBinary, RawBson, RawJavaScriptCodeWithScope, RawRegex}, error::{Error, ErrorKind, Result}, }; From f67f9b1ebe3e83f1103c6db04d348795731fcd71 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 13 Oct 2021 17:04:14 -0400 Subject: [PATCH 35/48] split code into different files --- src/raw/{elem.rs => bson.rs} | 0 src/raw/doc.rs | 485 +---------------------------------- src/raw/document.rs | 177 +++++++++++++ src/raw/iter.rs | 305 ++++++++++++++++++++++ src/raw/mod.rs | 10 +- 5 files changed, 503 insertions(+), 474 deletions(-) rename src/raw/{elem.rs => bson.rs} (100%) create mode 100644 src/raw/document.rs create mode 100644 src/raw/iter.rs diff --git a/src/raw/elem.rs b/src/raw/bson.rs similarity index 100% rename from src/raw/elem.rs rename to src/raw/bson.rs diff --git a/src/raw/doc.rs b/src/raw/doc.rs index 109d90f0..dc30342c 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -1,220 +1,25 @@ use std::{ - borrow::{Borrow, Cow}, + borrow::Cow, convert::{TryFrom, TryInto}, ops::Deref, }; -use crate::{ - de::{read_bool, MIN_BSON_DOCUMENT_SIZE, MIN_CODE_WITH_SCOPE_SIZE}, - raw::{ - checked_add, - elem::RawDbPointer, - error::{try_with_key, ErrorKind}, - f64_from_slice, - i64_from_slice, - RawJavaScriptCodeWithScope, - }, - spec::BinarySubtype, - DateTime, - Decimal128, - Timestamp, -}; +use crate::{raw::error::ErrorKind, DateTime, Timestamp}; use super::{ error::{ValueAccessError, ValueAccessErrorKind, ValueAccessResult}, i32_from_slice, - read_lenencoded, - read_nullterminated, Error, + Iter, RawArr, RawBinary, RawBson, + RawDocument, RawRegex, Result, }; use crate::{oid::ObjectId, spec::ElementType, Document}; -/// A BSON document, stored as raw bytes on the heap. This can be created from a `Vec` or -/// a [`bson::Document`]. -/// -/// Accessing elements within a [`RawDocument`] is similar to element access in [`bson::Document`], -/// but because the contents are parsed during iteration instead of at creation time, format errors -/// can happen at any time during use. -/// -/// Iterating over a [`RawDocument`] yields either an error or a key-value pair that borrows from -/// the original document without making any additional allocations. -/// -/// ``` -/// # use bson::raw::{RawDocument, Error}; -/// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; -/// let mut iter = doc.iter(); -/// let (key, value) = iter.next().unwrap()?; -/// assert_eq!(key, "hi"); -/// assert_eq!(value.as_str(), Ok("y'all")); -/// assert!(iter.next().is_none()); -/// # Ok::<(), Error>(()) -/// ``` -/// -/// This type implements `Deref` to `RawDoc`, meaning that all methods on `RawDoc` slices are -/// available on `RawDocument` values as well. This includes [`RawDoc::get`] or any of the -/// type-specific getters, such as [`RawDoc::get_object_id`] or [`RawDoc::get_str`]. Note that -/// accessing elements is an O(N) operation, as it requires iterating through the document from the -/// beginning to find the requested key. -/// -/// ``` -/// # use bson::raw::{RawDocument, Error}; -/// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; -/// assert_eq!(doc.get_str("hi")?, Some("y'all")); -/// # Ok::<(), Error>(()) -/// ``` -#[derive(Clone, Debug)] -pub struct RawDocument { - data: Vec, -} - -impl RawDocument { - /// Constructs a new RawDocument, validating _only_ the - /// following invariants: - /// * `data` is at least five bytes long (the minimum for a valid BSON document) - /// * the initial four bytes of `data` accurately represent the length of the bytes as - /// required by the BSON spec. - /// * the last byte of `data` is a 0 - /// - /// Note that the internal structure of the bytes representing the - /// BSON elements is _not_ validated at all by this method. If the - /// bytes do not conform to the BSON spec, then method calls on - /// the RawDocument will return Errors where appropriate. - /// - /// ``` - /// # use bson::raw::{RawDocument, Error}; - /// let doc = RawDocument::new(b"\x05\0\0\0\0".to_vec())?; - /// # Ok::<(), Error>(()) - /// ``` - pub fn new(data: Vec) -> Result { - let _ = RawDoc::new(data.as_slice())?; - Ok(Self { data }) - } - - /// Create a RawDocument from a Document. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, oid::ObjectId, raw::RawDocument}; - /// - /// let document = doc! { - /// "_id": ObjectId::new(), - /// "name": "Herman Melville", - /// "title": "Moby-Dick", - /// }; - /// let doc = RawDocument::from_document(&document)?; - /// # Ok::<(), Error>(()) - /// ``` - pub fn from_document(doc: &Document) -> Result { - let mut data = Vec::new(); - doc.to_writer(&mut data).map_err(|e| Error { - key: None, - kind: ErrorKind::MalformedValue { - message: e.to_string(), - }, - })?; - - Ok(Self { data }) - } - - /// Gets an iterator over the elements in the `RawDocument`, which yields `Result<&str, - /// Element<'_>>`. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, raw::RawDocument}; - /// - /// let doc = RawDocument::from_document(&doc! { "ferris": true })?; - /// - /// for element in doc.iter() { - /// let (key, value) = element?; - /// assert_eq!(key, "ferris"); - /// assert_eq!(value.as_bool()?, true); - /// } - /// # Ok::<(), Error>(()) - /// ``` - /// - /// # Note: - /// - /// There is no owning iterator for RawDocument. If you need ownership over - /// elements that might need to allocate, you must explicitly convert - /// them to owned types yourself. - pub fn iter(&self) -> Iter<'_> { - self.into_iter() - } - - /// Return the contained data as a `Vec` - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, raw::RawDocument}; - /// - /// let doc = RawDocument::from_document(&doc!{})?; - /// assert_eq!(doc.into_vec(), b"\x05\x00\x00\x00\x00".to_vec()); - /// # Ok::<(), Error>(()) - /// ``` - pub fn into_vec(self) -> Vec { - self.data - } -} - -impl<'a> From for Cow<'a, RawDoc> { - fn from(rd: RawDocument) -> Self { - Cow::Owned(rd) - } -} - -impl<'a> From<&'a RawDocument> for Cow<'a, RawDoc> { - fn from(rd: &'a RawDocument) -> Self { - Cow::Borrowed(rd.as_ref()) - } -} - -impl TryFrom for Document { - type Error = Error; - - fn try_from(raw: RawDocument) -> Result { - Document::try_from(raw.as_ref()) - } -} - -impl<'a> IntoIterator for &'a RawDocument { - type IntoIter = Iter<'a>; - type Item = Result<(&'a str, RawBson<'a>)>; - - fn into_iter(self) -> Iter<'a> { - Iter { - doc: &self, - offset: 4, - valid: true, - } - } -} - -impl AsRef for RawDocument { - fn as_ref(&self) -> &RawDoc { - RawDoc::new_unchecked(&self.data) - } -} - -impl Borrow for RawDocument { - fn borrow(&self) -> &RawDoc { - &*self - } -} - -impl ToOwned for RawDoc { - type Owned = RawDocument; - - fn to_owned(&self) -> Self::Owned { - self.to_raw_document() - } -} - /// A slice of a BSON document (akin to [`std::str`]). This can be created from a /// [`RawDocument`] or any type that contains valid BSON data, including static binary literals, /// [Vec](std::vec::Vec), or arrays. @@ -314,7 +119,7 @@ impl RawDoc { } /// Creates a new Doc referencing the provided data slice. - fn new_unchecked + ?Sized>(data: &D) -> &RawDoc { + pub(crate) fn new_unchecked + ?Sized>(data: &D) -> &RawDoc { // SAFETY: // // Dereferencing a raw pointer requires unsafe due to the potential that the pointer is @@ -697,6 +502,14 @@ impl Deref for RawDocument { } } +impl ToOwned for RawDoc { + type Owned = RawDocument; + + fn to_owned(&self) -> Self::Owned { + self.to_raw_document() + } +} + impl<'a> From<&'a RawDoc> for Cow<'a, RawDoc> { fn from(rdr: &'a RawDoc) -> Self { Cow::Borrowed(rdr) @@ -719,276 +532,6 @@ impl<'a> IntoIterator for &'a RawDoc { type Item = Result<(&'a str, RawBson<'a>)>; fn into_iter(self) -> Iter<'a> { - Iter { - doc: self, - offset: 4, - valid: true, - } - } -} - -/// An iterator over the document's entries. -pub struct Iter<'a> { - doc: &'a RawDoc, - offset: usize, - - /// Whether the underlying doc is assumed to be valid or if an error has been encountered. - /// After an error, all subsequent iterations will return None. - valid: bool, -} - -impl<'a> Iter<'a> { - fn verify_enough_bytes(&self, start: usize, num_bytes: usize) -> Result<()> { - let end = checked_add(start, num_bytes)?; - if self.doc.data.get(start..end).is_none() { - return Err(Error::new_without_key(ErrorKind::MalformedValue { - message: format!( - "length exceeds remaining length of buffer: {} vs {}", - num_bytes, - self.doc.data.len() - start - ), - })); - } - Ok(()) - } - - fn next_oid(&self, starting_at: usize) -> Result { - self.verify_enough_bytes(starting_at, 12)?; - let oid = ObjectId::from_bytes( - self.doc.data[starting_at..(starting_at + 12)] - .try_into() - .unwrap(), // ok because we know slice is 12 bytes long - ); - Ok(oid) - } - - fn next_document(&self, starting_at: usize) -> Result<&'a RawDoc> { - self.verify_enough_bytes(starting_at, MIN_BSON_DOCUMENT_SIZE as usize)?; - let size = i32_from_slice(&self.doc.data[starting_at..])? as usize; - - if size < MIN_BSON_DOCUMENT_SIZE as usize { - return Err(Error::new_without_key(ErrorKind::MalformedValue { - message: format!("document too small: {} bytes", size), - })); - } - - self.verify_enough_bytes(starting_at, size)?; - let end = starting_at + size; - - if self.doc.data[end - 1] != 0 { - return Err(Error { - key: None, - kind: ErrorKind::MalformedValue { - message: "not null terminated".into(), - }, - }); - } - RawDoc::new(&self.doc.data[starting_at..end]) - } -} - -impl<'a> Iterator for Iter<'a> { - type Item = Result<(&'a str, RawBson<'a>)>; - - fn next(&mut self) -> Option)>> { - if !self.valid { - return None; - } else if self.offset == self.doc.data.len() - 1 { - if self.doc.data[self.offset] == 0 { - // end of document marker - return None; - } else { - self.valid = false; - return Some(Err(Error { - key: None, - kind: ErrorKind::MalformedValue { - message: "document not null terminated".into(), - }, - })); - } - } else if self.offset >= self.doc.data.len() { - self.valid = false; - return Some(Err(Error::new_without_key(ErrorKind::MalformedValue { - message: "iteration overflowed document".to_string(), - }))); - } - - let key = match read_nullterminated(&self.doc.data[self.offset + 1..]) { - Ok(k) => k, - Err(e) => { - self.valid = false; - return Some(Err(e)); - } - }; - - let kvp_result = try_with_key(key, || { - let valueoffset = self.offset + 1 + key.len() + 1; // type specifier + key + \0 - - let element_type = match ElementType::from(self.doc.data[self.offset]) { - Some(et) => et, - None => { - return Err(Error::new_with_key( - key, - ErrorKind::MalformedValue { - message: format!("invalid tag: {}", self.doc.data[self.offset]), - }, - )) - } - }; - - let (element, element_size) = match element_type { - ElementType::Int32 => { - let i = i32_from_slice(&self.doc.data[valueoffset..])?; - (RawBson::Int32(i), 4) - } - ElementType::Int64 => { - let i = i64_from_slice(&self.doc.data[valueoffset..])?; - (RawBson::Int64(i), 8) - } - ElementType::Double => { - let f = f64_from_slice(&self.doc.data[valueoffset..])?; - (RawBson::Double(f), 8) - } - ElementType::String => { - let s = read_lenencoded(&self.doc.data[valueoffset..])?; - (RawBson::String(s), 4 + s.len() + 1) - } - ElementType::EmbeddedDocument => { - let doc = self.next_document(valueoffset)?; - (RawBson::Document(doc), doc.as_bytes().len()) - } - ElementType::Array => { - let doc = self.next_document(valueoffset)?; - (RawBson::Array(RawArr::from_doc(doc)), doc.as_bytes().len()) - } - ElementType::Binary => { - let len = i32_from_slice(&self.doc.data[valueoffset..])? as usize; - let data_start = valueoffset + 4 + 1; - self.verify_enough_bytes(valueoffset, len)?; - let subtype = BinarySubtype::from(self.doc.data[valueoffset + 4]); - let data = match subtype { - BinarySubtype::BinaryOld => { - if len < 4 { - return Err(Error::new_without_key(ErrorKind::MalformedValue { - message: "old binary subtype has no inner declared length" - .into(), - })); - } - let oldlength = i32_from_slice(&self.doc.data[data_start..])? as usize; - if checked_add(oldlength, 4)? != len { - return Err(Error::new_without_key(ErrorKind::MalformedValue { - message: "old binary subtype has wrong inner declared length" - .into(), - })); - } - &self.doc.data[(data_start + 4)..(data_start + len)] - } - _ => &self.doc.data[data_start..(data_start + len)], - }; - (RawBson::Binary(RawBinary { subtype, data }), 4 + 1 + len) - } - ElementType::ObjectId => { - let oid = self.next_oid(valueoffset)?; - (RawBson::ObjectId(oid), 12) - } - ElementType::Boolean => { - let b = read_bool(&self.doc.data[valueoffset..]).map_err(|e| { - Error::new_with_key( - key, - ErrorKind::MalformedValue { - message: e.to_string(), - }, - ) - })?; - (RawBson::Boolean(b), 1) - } - ElementType::DateTime => { - let ms = i64_from_slice(&self.doc.data[valueoffset..])?; - (RawBson::DateTime(DateTime::from_millis(ms)), 8) - } - ElementType::RegularExpression => { - let pattern = read_nullterminated(&self.doc.data[valueoffset..])?; - let options = - read_nullterminated(&self.doc.data[(valueoffset + pattern.len() + 1)..])?; - ( - RawBson::RegularExpression(RawRegex { pattern, options }), - pattern.len() + 1 + options.len() + 1, - ) - } - ElementType::Null => (RawBson::Null, 0), - ElementType::Undefined => (RawBson::Undefined, 0), - ElementType::Timestamp => { - let ts = - Timestamp::from_reader(&self.doc.data[valueoffset..]).map_err(|e| { - Error::new_without_key(ErrorKind::MalformedValue { - message: e.to_string(), - }) - })?; - (RawBson::Timestamp(ts), 8) - } - ElementType::JavaScriptCode => { - let code = read_lenencoded(&self.doc.data[valueoffset..])?; - (RawBson::JavaScriptCode(code), 4 + code.len() + 1) - } - ElementType::JavaScriptCodeWithScope => { - let length = i32_from_slice(&self.doc.data[valueoffset..])? as usize; - - if length < MIN_CODE_WITH_SCOPE_SIZE as usize { - return Err(Error::new_without_key(ErrorKind::MalformedValue { - message: "code with scope length too small".to_string(), - })); - } - - self.verify_enough_bytes(valueoffset, length)?; - let slice = &self.doc.data[valueoffset..(valueoffset + length)]; - let code = read_lenencoded(&slice[4..])?; - let scope_start = 4 + 4 + code.len() + 1; - let scope = RawDoc::new(&slice[scope_start..])?; - ( - RawBson::JavaScriptCodeWithScope(RawJavaScriptCodeWithScope { - code, - scope, - }), - length, - ) - } - ElementType::DbPointer => { - let namespace = read_lenencoded(&self.doc.data[valueoffset..])?; - let id = self.next_oid(valueoffset + 4 + namespace.len() + 1)?; - ( - RawBson::DbPointer(RawDbPointer { namespace, id }), - 4 + namespace.len() + 1 + 12, - ) - } - ElementType::Symbol => { - let s = read_lenencoded(&self.doc.data[valueoffset..])?; - (RawBson::Symbol(s), 4 + s.len() + 1) - } - ElementType::Decimal128 => { - self.verify_enough_bytes(valueoffset, 16)?; - ( - RawBson::Decimal128(Decimal128::from_bytes( - self.doc.data[valueoffset..(valueoffset + 16)] - .try_into() - .unwrap(), - )), - 16, - ) - } - ElementType::MinKey => (RawBson::MinKey, 0), - ElementType::MaxKey => (RawBson::MaxKey, 0), - }; - - self.offset = valueoffset + element_size; - self.verify_enough_bytes(valueoffset, element_size)?; - - Ok((key, element)) - }); - - if kvp_result.is_err() { - self.valid = false; - } - - Some(kvp_result) + Iter::new(self) } } diff --git a/src/raw/document.rs b/src/raw/document.rs new file mode 100644 index 00000000..7e2448fc --- /dev/null +++ b/src/raw/document.rs @@ -0,0 +1,177 @@ +use std::{ + borrow::{Borrow, Cow}, + convert::TryFrom, +}; + +use crate::Document; + +use super::{Error, ErrorKind, Iter, RawBson, RawDoc, Result}; + +/// A BSON document, stored as raw bytes on the heap. This can be created from a `Vec` or +/// a [`bson::Document`]. +/// +/// Accessing elements within a [`RawDocument`] is similar to element access in [`bson::Document`], +/// but because the contents are parsed during iteration instead of at creation time, format errors +/// can happen at any time during use. +/// +/// Iterating over a [`RawDocument`] yields either an error or a key-value pair that borrows from +/// the original document without making any additional allocations. +/// +/// ``` +/// # use bson::raw::{RawDocument, Error}; +/// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +/// let mut iter = doc.iter(); +/// let (key, value) = iter.next().unwrap()?; +/// assert_eq!(key, "hi"); +/// assert_eq!(value.as_str(), Ok("y'all")); +/// assert!(iter.next().is_none()); +/// # Ok::<(), Error>(()) +/// ``` +/// +/// This type implements `Deref` to `RawDoc`, meaning that all methods on `RawDoc` slices are +/// available on `RawDocument` values as well. This includes [`RawDoc::get`] or any of the +/// type-specific getters, such as [`RawDoc::get_object_id`] or [`RawDoc::get_str`]. Note that +/// accessing elements is an O(N) operation, as it requires iterating through the document from the +/// beginning to find the requested key. +/// +/// ``` +/// # use bson::raw::{RawDocument, Error}; +/// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +/// assert_eq!(doc.get_str("hi")?, Some("y'all")); +/// # Ok::<(), Error>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct RawDocument { + pub(crate) data: Vec, +} + +impl RawDocument { + /// Constructs a new RawDocument, validating _only_ the + /// following invariants: + /// * `data` is at least five bytes long (the minimum for a valid BSON document) + /// * the initial four bytes of `data` accurately represent the length of the bytes as + /// required by the BSON spec. + /// * the last byte of `data` is a 0 + /// + /// Note that the internal structure of the bytes representing the + /// BSON elements is _not_ validated at all by this method. If the + /// bytes do not conform to the BSON spec, then method calls on + /// the RawDocument will return Errors where appropriate. + /// + /// ``` + /// # use bson::raw::{RawDocument, Error}; + /// let doc = RawDocument::new(b"\x05\0\0\0\0".to_vec())?; + /// # Ok::<(), Error>(()) + /// ``` + pub fn new(data: Vec) -> Result { + let _ = RawDoc::new(data.as_slice())?; + Ok(Self { data }) + } + + /// Create a RawDocument from a Document. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, oid::ObjectId, raw::RawDocument}; + /// + /// let document = doc! { + /// "_id": ObjectId::new(), + /// "name": "Herman Melville", + /// "title": "Moby-Dick", + /// }; + /// let doc = RawDocument::from_document(&document)?; + /// # Ok::<(), Error>(()) + /// ``` + pub fn from_document(doc: &Document) -> Result { + let mut data = Vec::new(); + doc.to_writer(&mut data).map_err(|e| Error { + key: None, + kind: ErrorKind::MalformedValue { + message: e.to_string(), + }, + })?; + + Ok(Self { data }) + } + + /// Gets an iterator over the elements in the `RawDocument`, which yields `Result<&str, + /// Element<'_>>`. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::RawDocument}; + /// + /// let doc = RawDocument::from_document(&doc! { "ferris": true })?; + /// + /// for element in doc.iter() { + /// let (key, value) = element?; + /// assert_eq!(key, "ferris"); + /// assert_eq!(value.as_bool()?, true); + /// } + /// # Ok::<(), Error>(()) + /// ``` + /// + /// # Note: + /// + /// There is no owning iterator for RawDocument. If you need ownership over + /// elements that might need to allocate, you must explicitly convert + /// them to owned types yourself. + pub fn iter(&self) -> Iter<'_> { + self.into_iter() + } + + /// Return the contained data as a `Vec` + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::RawDocument}; + /// + /// let doc = RawDocument::from_document(&doc!{})?; + /// assert_eq!(doc.into_vec(), b"\x05\x00\x00\x00\x00".to_vec()); + /// # Ok::<(), Error>(()) + /// ``` + pub fn into_vec(self) -> Vec { + self.data + } +} + +impl<'a> From for Cow<'a, RawDoc> { + fn from(rd: RawDocument) -> Self { + Cow::Owned(rd) + } +} + +impl<'a> From<&'a RawDocument> for Cow<'a, RawDoc> { + fn from(rd: &'a RawDocument) -> Self { + Cow::Borrowed(rd.as_ref()) + } +} + +impl TryFrom for Document { + type Error = Error; + + fn try_from(raw: RawDocument) -> Result { + Document::try_from(raw.as_ref()) + } +} + +impl<'a> IntoIterator for &'a RawDocument { + type IntoIter = Iter<'a>; + type Item = Result<(&'a str, RawBson<'a>)>; + + fn into_iter(self) -> Iter<'a> { + Iter::new(self) + } +} + +impl AsRef for RawDocument { + fn as_ref(&self) -> &RawDoc { + RawDoc::new_unchecked(&self.data) + } +} + +impl Borrow for RawDocument { + fn borrow(&self) -> &RawDoc { + &*self + } +} diff --git a/src/raw/iter.rs b/src/raw/iter.rs new file mode 100644 index 00000000..7093b5d5 --- /dev/null +++ b/src/raw/iter.rs @@ -0,0 +1,305 @@ +use std::convert::TryInto; + +use crate::{ + de::{read_bool, MIN_BSON_DOCUMENT_SIZE, MIN_CODE_WITH_SCOPE_SIZE}, + oid::ObjectId, + raw::{Error, ErrorKind, Result}, + spec::{BinarySubtype, ElementType}, + DateTime, + Decimal128, + Timestamp, +}; + +use super::{ + bson::RawDbPointer, + checked_add, + error::try_with_key, + f64_from_slice, + i32_from_slice, + i64_from_slice, + read_lenencoded, + read_nullterminated, + RawArr, + RawBinary, + RawBson, + RawDoc, + RawJavaScriptCodeWithScope, + RawRegex, +}; + +/// An iterator over the document's entries. +pub struct Iter<'a> { + doc: &'a RawDoc, + offset: usize, + + /// Whether the underlying doc is assumed to be valid or if an error has been encountered. + /// After an error, all subsequent iterations will return None. + valid: bool, +} + +impl<'a> Iter<'a> { + pub(crate) fn new(doc: &'a RawDoc) -> Self { + Self { + doc, + offset: 4, + valid: false, + } + } + + fn verify_enough_bytes(&self, start: usize, num_bytes: usize) -> Result<()> { + let end = checked_add(start, num_bytes)?; + if self.doc.as_bytes().get(start..end).is_none() { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: format!( + "length exceeds remaining length of buffer: {} vs {}", + num_bytes, + self.doc.as_bytes().len() - start + ), + })); + } + Ok(()) + } + + fn next_oid(&self, starting_at: usize) -> Result { + self.verify_enough_bytes(starting_at, 12)?; + let oid = ObjectId::from_bytes( + self.doc.as_bytes()[starting_at..(starting_at + 12)] + .try_into() + .unwrap(), // ok because we know slice is 12 bytes long + ); + Ok(oid) + } + + fn next_document(&self, starting_at: usize) -> Result<&'a RawDoc> { + self.verify_enough_bytes(starting_at, MIN_BSON_DOCUMENT_SIZE as usize)?; + let size = i32_from_slice(&self.doc.as_bytes()[starting_at..])? as usize; + + if size < MIN_BSON_DOCUMENT_SIZE as usize { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: format!("document too small: {} bytes", size), + })); + } + + self.verify_enough_bytes(starting_at, size)?; + let end = starting_at + size; + + if self.doc.as_bytes()[end - 1] != 0 { + return Err(Error { + key: None, + kind: ErrorKind::MalformedValue { + message: "not null terminated".into(), + }, + }); + } + RawDoc::new(&self.doc.as_bytes()[starting_at..end]) + } +} + +impl<'a> Iterator for Iter<'a> { + type Item = Result<(&'a str, RawBson<'a>)>; + + fn next(&mut self) -> Option)>> { + if !self.valid { + return None; + } else if self.offset == self.doc.as_bytes().len() - 1 { + if self.doc.as_bytes()[self.offset] == 0 { + // end of document marker + return None; + } else { + self.valid = false; + return Some(Err(Error { + key: None, + kind: ErrorKind::MalformedValue { + message: "document not null terminated".into(), + }, + })); + } + } else if self.offset >= self.doc.as_bytes().len() { + self.valid = false; + return Some(Err(Error::new_without_key(ErrorKind::MalformedValue { + message: "iteration overflowed document".to_string(), + }))); + } + + let key = match read_nullterminated(&self.doc.as_bytes()[self.offset + 1..]) { + Ok(k) => k, + Err(e) => { + self.valid = false; + return Some(Err(e)); + } + }; + + let kvp_result = try_with_key(key, || { + let valueoffset = self.offset + 1 + key.len() + 1; // type specifier + key + \0 + + let element_type = match ElementType::from(self.doc.as_bytes()[self.offset]) { + Some(et) => et, + None => { + return Err(Error::new_with_key( + key, + ErrorKind::MalformedValue { + message: format!("invalid tag: {}", self.doc.as_bytes()[self.offset]), + }, + )) + } + }; + + let (element, element_size) = match element_type { + ElementType::Int32 => { + let i = i32_from_slice(&self.doc.as_bytes()[valueoffset..])?; + (RawBson::Int32(i), 4) + } + ElementType::Int64 => { + let i = i64_from_slice(&self.doc.as_bytes()[valueoffset..])?; + (RawBson::Int64(i), 8) + } + ElementType::Double => { + let f = f64_from_slice(&self.doc.as_bytes()[valueoffset..])?; + (RawBson::Double(f), 8) + } + ElementType::String => { + let s = read_lenencoded(&self.doc.as_bytes()[valueoffset..])?; + (RawBson::String(s), 4 + s.len() + 1) + } + ElementType::EmbeddedDocument => { + let doc = self.next_document(valueoffset)?; + (RawBson::Document(doc), doc.as_bytes().len()) + } + ElementType::Array => { + let doc = self.next_document(valueoffset)?; + (RawBson::Array(RawArr::from_doc(doc)), doc.as_bytes().len()) + } + ElementType::Binary => { + let len = i32_from_slice(&self.doc.as_bytes()[valueoffset..])? as usize; + let data_start = valueoffset + 4 + 1; + self.verify_enough_bytes(valueoffset, len)?; + let subtype = BinarySubtype::from(self.doc.as_bytes()[valueoffset + 4]); + let data = match subtype { + BinarySubtype::BinaryOld => { + if len < 4 { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: "old binary subtype has no inner declared length" + .into(), + })); + } + let oldlength = + i32_from_slice(&self.doc.as_bytes()[data_start..])? as usize; + if checked_add(oldlength, 4)? != len { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: "old binary subtype has wrong inner declared length" + .into(), + })); + } + &self.doc.as_bytes()[(data_start + 4)..(data_start + len)] + } + _ => &self.doc.as_bytes()[data_start..(data_start + len)], + }; + (RawBson::Binary(RawBinary { subtype, data }), 4 + 1 + len) + } + ElementType::ObjectId => { + let oid = self.next_oid(valueoffset)?; + (RawBson::ObjectId(oid), 12) + } + ElementType::Boolean => { + let b = read_bool(&self.doc.as_bytes()[valueoffset..]).map_err(|e| { + Error::new_with_key( + key, + ErrorKind::MalformedValue { + message: e.to_string(), + }, + ) + })?; + (RawBson::Boolean(b), 1) + } + ElementType::DateTime => { + let ms = i64_from_slice(&self.doc.as_bytes()[valueoffset..])?; + (RawBson::DateTime(DateTime::from_millis(ms)), 8) + } + ElementType::RegularExpression => { + let pattern = read_nullterminated(&self.doc.as_bytes()[valueoffset..])?; + let options = read_nullterminated( + &self.doc.as_bytes()[(valueoffset + pattern.len() + 1)..], + )?; + ( + RawBson::RegularExpression(RawRegex { pattern, options }), + pattern.len() + 1 + options.len() + 1, + ) + } + ElementType::Null => (RawBson::Null, 0), + ElementType::Undefined => (RawBson::Undefined, 0), + ElementType::Timestamp => { + let ts = Timestamp::from_reader(&self.doc.as_bytes()[valueoffset..]).map_err( + |e| { + Error::new_without_key(ErrorKind::MalformedValue { + message: e.to_string(), + }) + }, + )?; + (RawBson::Timestamp(ts), 8) + } + ElementType::JavaScriptCode => { + let code = read_lenencoded(&self.doc.as_bytes()[valueoffset..])?; + (RawBson::JavaScriptCode(code), 4 + code.len() + 1) + } + ElementType::JavaScriptCodeWithScope => { + let length = i32_from_slice(&self.doc.as_bytes()[valueoffset..])? as usize; + + if length < MIN_CODE_WITH_SCOPE_SIZE as usize { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: "code with scope length too small".to_string(), + })); + } + + self.verify_enough_bytes(valueoffset, length)?; + let slice = &&self.doc.as_bytes()[valueoffset..(valueoffset + length)]; + let code = read_lenencoded(&slice[4..])?; + let scope_start = 4 + 4 + code.len() + 1; + let scope = RawDoc::new(&slice[scope_start..])?; + ( + RawBson::JavaScriptCodeWithScope(RawJavaScriptCodeWithScope { + code, + scope, + }), + length, + ) + } + ElementType::DbPointer => { + let namespace = read_lenencoded(&self.doc.as_bytes()[valueoffset..])?; + let id = self.next_oid(valueoffset + 4 + namespace.len() + 1)?; + ( + RawBson::DbPointer(RawDbPointer { namespace, id }), + 4 + namespace.len() + 1 + 12, + ) + } + ElementType::Symbol => { + let s = read_lenencoded(&self.doc.as_bytes()[valueoffset..])?; + (RawBson::Symbol(s), 4 + s.len() + 1) + } + ElementType::Decimal128 => { + self.verify_enough_bytes(valueoffset, 16)?; + ( + RawBson::Decimal128(Decimal128::from_bytes( + self.doc.as_bytes()[valueoffset..(valueoffset + 16)] + .try_into() + .unwrap(), + )), + 16, + ) + } + ElementType::MinKey => (RawBson::MinKey, 0), + ElementType::MaxKey => (RawBson::MaxKey, 0), + }; + + self.offset = valueoffset + element_size; + self.verify_enough_bytes(valueoffset, element_size)?; + + Ok((key, element)) + }); + + if kvp_result.is_err() { + self.valid = false; + } + + Some(kvp_result) + } +} diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 6c20ed0d..ace0f4b4 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -112,9 +112,11 @@ //! ``` mod array; +mod bson; mod doc; -mod elem; +mod document; mod error; +mod iter; #[cfg(test)] mod test; @@ -124,9 +126,11 @@ use crate::de::MIN_BSON_STRING_SIZE; pub use self::{ array::{RawArr, RawArrIter}, - doc::{Iter, RawDoc, RawDocument}, - elem::{RawBinary, RawBson, RawJavaScriptCodeWithScope, RawRegex}, + bson::{RawBinary, RawBson, RawJavaScriptCodeWithScope, RawRegex}, + doc::RawDoc, + document::RawDocument, error::{Error, ErrorKind, Result}, + iter::Iter, }; /// Given a u8 slice, return an i32 calculated from the first four bytes in From f71b02ea97beb1aa58316fb2848c7d39f9030bda Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 13 Oct 2021 17:23:57 -0400 Subject: [PATCH 36/48] various cleanup --- src/raw/array.rs | 2 +- src/raw/bson.rs | 6 +++--- src/raw/doc.rs | 22 ++++++---------------- src/raw/document.rs | 15 ++++++++++++--- src/raw/error.rs | 16 ++++------------ 5 files changed, 26 insertions(+), 35 deletions(-) diff --git a/src/raw/array.rs b/src/raw/array.rs index 387747d6..5de3f07a 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -62,7 +62,7 @@ use crate::{oid::ObjectId, spec::ElementType, Bson, DateTime, Timestamp}; /// assert_eq!(rawarray.get_bool(1)?, true); /// # Ok::<(), ValueAccessError>(()) /// ``` -#[derive(Debug)] +#[derive(Debug, PartialEq)] #[repr(transparent)] pub struct RawArr { pub(crate) doc: RawDoc, diff --git a/src/raw/bson.rs b/src/raw/bson.rs index a8f90bda..8a1f65d5 100644 --- a/src/raw/bson.rs +++ b/src/raw/bson.rs @@ -11,7 +11,7 @@ use crate::{ }; /// A BSON value referencing raw bytes stored elsewhere. -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq)] pub enum RawBson<'a> { /// 64-bit binary floating point Double(f64), @@ -316,7 +316,7 @@ impl<'a> RawBinary<'a> { } /// A BSON regex referencing raw bytes stored elsewhere. -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq)] pub struct RawRegex<'a> { pub(super) pattern: &'a str, pub(super) options: &'a str, @@ -335,7 +335,7 @@ impl<'a> RawRegex<'a> { } /// A BSON "code with scope" value referencing raw bytes stored elsewhere. -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq)] pub struct RawJavaScriptCodeWithScope<'a> { pub(crate) code: &'a str, pub(crate) scope: &'a RawDoc, diff --git a/src/raw/doc.rs b/src/raw/doc.rs index dc30342c..391366e9 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -1,7 +1,6 @@ use std::{ borrow::Cow, convert::{TryFrom, TryInto}, - ops::Deref, }; use crate::{raw::error::ErrorKind, DateTime, Timestamp}; @@ -54,11 +53,11 @@ use crate::{oid::ObjectId, spec::ElementType, Document}; /// /// ``` /// # use bson::raw::{RawDocument, Error}; -/// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +/// let doc = RawDoc::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00")?; /// assert_eq!(doc.get_str("hi")?, Some("y'all")); /// # Ok::<(), Error>(()) /// ``` -#[derive(Debug)] +#[derive(Debug, PartialEq)] #[repr(transparent)] pub struct RawDoc { data: [u8], @@ -127,8 +126,8 @@ impl RawDoc { // fact that it's created by a safe reference. Converting &[u8] to *const [u8] will be // properly aligned due to them being references to the same type, and converting *const // [u8] to *const RawDoc is aligned due to the fact that the only field in a - // RawDoc is a [u8], meaning the structs are represented identically at the byte - // level. + // RawDoc is a [u8] and it is #[repr(transparent), meaning the structs are represented + // identically at the byte level. unsafe { &*(data.as_ref() as *const [u8] as *const RawDoc) } } @@ -142,9 +141,8 @@ impl RawDoc { /// let doc: RawDocument = doc_ref.to_raw_document(); /// # Ok::<(), Error>(()) pub fn to_raw_document(&self) -> RawDocument { - RawDocument { - data: self.data.to_owned(), - } + // unwrap is ok here because we already verified the bytes in `RawDocumentRef::new` + RawDocument::new(self.data.to_owned()).unwrap() } /// Gets a reference to the value corresponding to the given key by iterating until the key is @@ -494,14 +492,6 @@ impl AsRef for RawDoc { } } -impl Deref for RawDocument { - type Target = RawDoc; - - fn deref(&self) -> &Self::Target { - RawDoc::new_unchecked(&self.data) - } -} - impl ToOwned for RawDoc { type Owned = RawDocument; diff --git a/src/raw/document.rs b/src/raw/document.rs index 7e2448fc..b36f6078 100644 --- a/src/raw/document.rs +++ b/src/raw/document.rs @@ -1,6 +1,7 @@ use std::{ borrow::{Borrow, Cow}, convert::TryFrom, + ops::Deref, }; use crate::Document; @@ -40,9 +41,9 @@ use super::{Error, ErrorKind, Iter, RawBson, RawDoc, Result}; /// assert_eq!(doc.get_str("hi")?, Some("y'all")); /// # Ok::<(), Error>(()) /// ``` -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub struct RawDocument { - pub(crate) data: Vec, + data: Vec, } impl RawDocument { @@ -113,7 +114,7 @@ impl RawDocument { /// /// # Note: /// - /// There is no owning iterator for RawDocument. If you need ownership over + /// There is no owning iterator for [`RawDocument`]. If you need ownership over /// elements that might need to allocate, you must explicitly convert /// them to owned types yourself. pub fn iter(&self) -> Iter<'_> { @@ -170,6 +171,14 @@ impl AsRef for RawDocument { } } +impl Deref for RawDocument { + type Target = RawDoc; + + fn deref(&self) -> &Self::Target { + RawDoc::new_unchecked(&self.data) + } +} + impl Borrow for RawDocument { fn borrow(&self) -> &RawDoc { &*self diff --git a/src/raw/error.rs b/src/raw/error.rs index 04b6a6da..d6516380 100644 --- a/src/raw/error.rs +++ b/src/raw/error.rs @@ -41,13 +41,6 @@ pub(crate) fn try_with_key Result>(key: impl AsRef, f: #[derive(Clone, Debug, PartialEq)] #[non_exhaustive] pub enum ErrorKind { - /// A BSON value did not fit the expected type. - #[non_exhaustive] - UnexpectedType { - actual: ElementType, - expected: ElementType, - }, - /// A BSON value did not fit the proper format. #[non_exhaustive] MalformedValue { message: String }, @@ -67,11 +60,6 @@ impl std::fmt::Display for Error { let prefix = p.as_ref().map_or("", |p| p.as_str()); match &self.kind { - ErrorKind::UnexpectedType { actual, expected } => write!( - f, - "{} unexpected element type: {:?}, expected: {:?}", - prefix, actual, expected - ), ErrorKind::MalformedValue { message } => { write!(f, "{}malformed value: {:?}", prefix, message) } @@ -97,6 +85,7 @@ pub struct ValueAccessError { pub key: String, } +/// The type of error encountered when using a direct getter (e.g. [`RawDoc::get_str`]). #[derive(Debug, PartialEq, Clone)] #[non_exhaustive] pub enum ValueAccessErrorKind { @@ -106,7 +95,10 @@ pub enum ValueAccessErrorKind { /// Found a Bson value with the specified key, but not with the expected type #[non_exhaustive] UnexpectedType { + /// The type that was expected. expected: ElementType, + + /// The actual type that was encountered. actual: ElementType, }, From 1ef11d50bbec0dc93197569a5e5ebfc692836681 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 13 Oct 2021 17:36:52 -0400 Subject: [PATCH 37/48] add debug impl for rawdoc wrappers --- src/raw/array.rs | 10 +++++++++- src/raw/bson.rs | 9 ++++++--- src/raw/doc.rs | 10 +++++++++- src/raw/document.rs | 10 +++++++++- src/raw/iter.rs | 10 ++++++++-- src/raw/test/mod.rs | 2 +- 6 files changed, 42 insertions(+), 9 deletions(-) diff --git a/src/raw/array.rs b/src/raw/array.rs index 5de3f07a..5873bb87 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -62,7 +62,7 @@ use crate::{oid::ObjectId, spec::ElementType, Bson, DateTime, Timestamp}; /// assert_eq!(rawarray.get_bool(1)?, true); /// # Ok::<(), ValueAccessError>(()) /// ``` -#[derive(Debug, PartialEq)] +#[derive(PartialEq)] #[repr(transparent)] pub struct RawArr { pub(crate) doc: RawDoc, @@ -193,6 +193,14 @@ impl RawArr { } } +impl std::fmt::Debug for RawArr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RawArr") + .field("data", &hex::encode(self.doc.as_bytes())) + .finish() + } +} + impl TryFrom<&RawArr> for Vec { type Error = Error; diff --git a/src/raw/bson.rs b/src/raw/bson.rs index 8a1f65d5..596593c0 100644 --- a/src/raw/bson.rs +++ b/src/raw/bson.rs @@ -259,7 +259,10 @@ impl<'a> TryFrom> for Bson { Bson::Array(items) } RawBson::Binary(rawbson) => { - let RawBinary { subtype, data } = rawbson; + let RawBinary { + subtype, + bytes: data, + } = rawbson; Bson::Binary(crate::Binary { subtype, bytes: data.to_vec(), @@ -300,7 +303,7 @@ impl<'a> TryFrom> for Bson { #[derive(Clone, Copy, Debug, PartialEq)] pub struct RawBinary<'a> { pub(super) subtype: BinarySubtype, - pub(super) data: &'a [u8], + pub(super) bytes: &'a [u8], } impl<'a> RawBinary<'a> { @@ -311,7 +314,7 @@ impl<'a> RawBinary<'a> { /// Gets the contained bytes of the binary value. pub fn as_bytes(self) -> &'a [u8] { - self.data + self.bytes } } diff --git a/src/raw/doc.rs b/src/raw/doc.rs index 391366e9..0a2facca 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -57,7 +57,7 @@ use crate::{oid::ObjectId, spec::ElementType, Document}; /// assert_eq!(doc.get_str("hi")?, Some("y'all")); /// # Ok::<(), Error>(()) /// ``` -#[derive(Debug, PartialEq)] +#[derive(PartialEq)] #[repr(transparent)] pub struct RawDoc { data: [u8], @@ -486,6 +486,14 @@ impl RawDoc { } } +impl std::fmt::Debug for RawDoc { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RawDoc") + .field("data", &hex::encode(&self.data)) + .finish() + } +} + impl AsRef for RawDoc { fn as_ref(&self) -> &RawDoc { self diff --git a/src/raw/document.rs b/src/raw/document.rs index b36f6078..826e59eb 100644 --- a/src/raw/document.rs +++ b/src/raw/document.rs @@ -41,7 +41,7 @@ use super::{Error, ErrorKind, Iter, RawBson, RawDoc, Result}; /// assert_eq!(doc.get_str("hi")?, Some("y'all")); /// # Ok::<(), Error>(()) /// ``` -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, PartialEq)] pub struct RawDocument { data: Vec, } @@ -136,6 +136,14 @@ impl RawDocument { } } +impl std::fmt::Debug for RawDocument { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RawDocument") + .field("data", &hex::encode(&self.data)) + .finish() + } +} + impl<'a> From for Cow<'a, RawDoc> { fn from(rd: RawDocument) -> Self { Cow::Owned(rd) diff --git a/src/raw/iter.rs b/src/raw/iter.rs index 7093b5d5..113857a5 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -42,7 +42,7 @@ impl<'a> Iter<'a> { Self { doc, offset: 4, - valid: false, + valid: true, } } @@ -194,7 +194,13 @@ impl<'a> Iterator for Iter<'a> { } _ => &self.doc.as_bytes()[data_start..(data_start + len)], }; - (RawBson::Binary(RawBinary { subtype, data }), 4 + 1 + len) + ( + RawBson::Binary(RawBinary { + subtype, + bytes: data, + }), + 4 + 1 + len, + ) } ElementType::ObjectId => { let oid = self.next_oid(valueoffset)?; diff --git a/src/raw/test/mod.rs b/src/raw/test/mod.rs index caecb116..845f9700 100644 --- a/src/raw/test/mod.rs +++ b/src/raw/test/mod.rs @@ -193,7 +193,7 @@ fn binary() { .as_binary() .expect("result was not a binary object"); assert_eq!(binary.subtype, BinarySubtype::Generic); - assert_eq!(binary.data, &[1, 2, 3]); + assert_eq!(binary.bytes, &[1, 2, 3]); } #[test] From 164de7c9255b055bd55ae2b0e14bd47e1915522c Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 13 Oct 2021 18:12:33 -0400 Subject: [PATCH 38/48] fix tests --- src/raw/array.rs | 9 ++- src/raw/doc.rs | 143 ++++++++++++++++++++++---------------------- src/raw/document.rs | 15 +++-- src/raw/mod.rs | 24 ++++---- src/raw/test/mod.rs | 12 ++-- 5 files changed, 102 insertions(+), 101 deletions(-) diff --git a/src/raw/array.rs b/src/raw/array.rs index 5873bb87..2d099401 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -25,13 +25,12 @@ use crate::{oid::ObjectId, spec::ElementType, Bson, DateTime, Timestamp}; /// original document without making any additional allocations. /// /// ``` -/// # use bson::raw::{Error}; /// use bson::{doc, raw::RawDoc}; /// /// let doc = doc! { /// "x": [1, true, "two", 5.5] /// }; -/// let bytes = bson::to_vec(&doc).unwrap(); +/// let bytes = bson::to_vec(&doc)?; /// /// let rawdoc = RawDoc::new(bytes.as_slice())?; /// let rawarray = rawdoc.get_array("x")?; @@ -39,7 +38,7 @@ use crate::{oid::ObjectId, spec::ElementType, Bson, DateTime, Timestamp}; /// for v in rawarray { /// println!("{:?}", v?); /// } -/// # Ok::<(), Error>(()) +/// # Ok::<(), Box>(()) /// ``` /// /// Individual elements can be accessed using [`RawArr::get`] or any of @@ -54,13 +53,13 @@ use crate::{oid::ObjectId, spec::ElementType, Bson, DateTime, Timestamp}; /// let doc = doc! { /// "x": [1, true, "two", 5.5] /// }; -/// let bytes = bson::to_vec(&doc).unwrap(); +/// let bytes = bson::to_vec(&doc)?; /// /// let rawdoc = RawDoc::new(bytes.as_slice())?; /// let rawarray = rawdoc.get_array("x")?; /// /// assert_eq!(rawarray.get_bool(1)?, true); -/// # Ok::<(), ValueAccessError>(()) +/// # Ok::<(), Box>(()) /// ``` #[derive(PartialEq)] #[repr(transparent)] diff --git a/src/raw/doc.rs b/src/raw/doc.rs index 0a2facca..1e9787ba 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -41,7 +41,7 @@ use crate::{oid::ObjectId, spec::ElementType, Document}; /// let mut iter = doc.into_iter(); /// let (key, value) = iter.next().unwrap()?; /// assert_eq!(key, "hi"); -/// assert_eq!(value.as_str(), Ok("y'all")); +/// assert_eq!(value.as_str(), Some("y'all")); /// assert!(iter.next().is_none()); /// # Ok::<(), Error>(()) /// ``` @@ -52,10 +52,11 @@ use crate::{oid::ObjectId, spec::ElementType, Document}; /// requires iterating through the document from the beginning to find the requested key. /// /// ``` -/// # use bson::raw::{RawDocument, Error}; +/// use bson::raw::RawDoc; +/// /// let doc = RawDoc::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00")?; -/// assert_eq!(doc.get_str("hi")?, Some("y'all")); -/// # Ok::<(), Error>(()) +/// assert_eq!(doc.get_str("hi")?, "y'all"); +/// # Ok::<(), Box>(()) /// ``` #[derive(PartialEq)] #[repr(transparent)] @@ -158,7 +159,7 @@ impl RawDoc { /// })?; /// /// let element = doc.get("f64")?.expect("finding key f64"); - /// assert_eq!(element.as_f64(), Ok(2.5)); + /// assert_eq!(element.as_f64(), Some(2.5)); /// assert!(doc.get("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` @@ -207,7 +208,7 @@ impl RawDoc { /// /// ``` /// # use bson::raw::Error; - /// use bson::raw::{ErrorKind, RawDocument}; + /// use bson::raw::{ValueAccessErrorKind, RawDocument}; /// use bson::doc; /// /// let doc = RawDocument::from_document(&doc! { @@ -215,10 +216,10 @@ impl RawDoc { /// "f64": 2.5, /// })?; /// - /// assert_eq!(doc.get_f64("f64"), Ok(Some(2.5))); - /// assert!(matches!(doc.get_f64("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert_eq!(doc.get_f64("unknown"), Ok(None)); - /// # Ok::<(), Error>(()) + /// assert_eq!(doc.get_f64("f64")?, 2.5); + /// assert!(matches!(doc.get_f64("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_f64("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) /// ``` pub fn get_f64(&self, key: impl AsRef) -> ValueAccessResult { self.get_with(key, ElementType::Double, RawBson::as_f64) @@ -228,18 +229,17 @@ impl RawDoc { /// key corresponds to a value which isn't a string. /// /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, raw::{RawDocument, ErrorKind}}; + /// use bson::{doc, raw::{RawDocument, ValueAccessErrorKind}}; /// /// let doc = RawDocument::from_document(&doc! { /// "string": "hello", /// "bool": true, /// })?; /// - /// assert_eq!(doc.get_str("string"), Ok(Some("hello"))); - /// assert!(matches!(doc.get_str("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert_eq!(doc.get_str("unknown"), Ok(None)); - /// # Ok::<(), Error>(()) + /// assert_eq!(doc.get_str("string")?, "hello"); + /// assert!(matches!(doc.get_str("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_str("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) /// ``` pub fn get_str<'a>(&'a self, key: impl AsRef) -> ValueAccessResult<&'a str> { self.get_with(key, ElementType::String, RawBson::as_str) @@ -250,17 +250,17 @@ impl RawDoc { /// /// ``` /// # use bson::raw::Error; - /// use bson::{doc, raw::{ErrorKind, RawDocument}}; + /// use bson::{doc, raw::{ValueAccessErrorKind, RawDocument}}; /// /// let doc = RawDocument::from_document(&doc! { /// "doc": { "key": "value"}, /// "bool": true, /// })?; /// - /// assert_eq!(doc.get_document("doc")?.expect("finding key doc").get_str("key"), Ok(Some("value"))); - /// assert!(matches!(doc.get_document("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert!(doc.get_document("unknown")?.is_none()); - /// # Ok::<(), Error>(()) + /// assert_eq!(doc.get_document("doc")?.get_str("key")?, "value"); + /// assert!(matches!(doc.get_document("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_document("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) /// ``` pub fn get_document<'a>(&'a self, key: impl AsRef) -> ValueAccessResult<&'a RawDoc> { self.get_with(key, ElementType::EmbeddedDocument, RawBson::as_document) @@ -270,22 +270,21 @@ impl RawDoc { /// the key corresponds to a value which isn't an array. /// /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, raw::RawDocument}; + /// use bson::{doc, raw::{RawDocument, ValueAccessErrorKind}}; /// /// let doc = RawDocument::from_document(&doc! { /// "array": [true, 3], /// "bool": true, /// })?; /// - /// let mut arr_iter = doc.get_array("array")?.expect("finding key array").into_iter(); - /// let _: bool = arr_iter.next().unwrap()?.as_bool()?; - /// let _: i32 = arr_iter.next().unwrap()?.as_i32()?; + /// let mut arr_iter = doc.get_array("array")?.into_iter(); + /// let _: bool = arr_iter.next().unwrap()?.as_bool().unwrap(); + /// let _: i32 = arr_iter.next().unwrap()?.as_i32().unwrap(); /// /// assert!(arr_iter.next().is_none()); /// assert!(doc.get_array("bool").is_err()); - /// assert!(doc.get_array("unknown")?.is_none()); - /// # Ok::<(), Error>(()) + /// assert!(matches!(doc.get_array("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) /// ``` pub fn get_array<'a>(&'a self, key: impl AsRef) -> ValueAccessResult<&'a RawArr> { self.get_with(key, ElementType::Array, RawBson::as_array) @@ -295,10 +294,9 @@ impl RawDoc { /// if the key corresponds to a value which isn't a binary value. /// /// ``` - /// # use bson::raw::Error; /// use bson::{ /// doc, - /// raw::{ErrorKind, RawDocument, RawBinary}, + /// raw::{ValueAccessErrorKind, RawDocument, RawBinary}, /// spec::BinarySubtype, /// Binary, /// }; @@ -308,10 +306,10 @@ impl RawDoc { /// "bool": true, /// })?; /// - /// assert_eq!(doc.get_binary("binary")?.map(RawBinary::as_bytes), Some(&[1, 2, 3][..])); - /// assert!(matches!(doc.get_binary("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert!(doc.get_binary("unknown")?.is_none()); - /// # Ok::<(), Error>(()) + /// assert_eq!(doc.get_binary("binary")?.as_bytes(), &[1, 2, 3][..]); + /// assert!(matches!(doc.get_binary("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_binary("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) /// ``` pub fn get_binary<'a>(&'a self, key: impl AsRef) -> ValueAccessResult> { self.get_with(key, ElementType::Binary, RawBson::as_binary) @@ -322,17 +320,17 @@ impl RawDoc { /// /// ``` /// # use bson::raw::Error; - /// use bson::{doc, oid::ObjectId, raw::{ErrorKind, RawDocument}}; + /// use bson::{doc, oid::ObjectId, raw::{ValueAccessErrorKind, RawDocument}}; /// /// let doc = RawDocument::from_document(&doc! { /// "_id": ObjectId::new(), /// "bool": true, /// })?; /// - /// let oid = doc.get_object_id("_id")?.unwrap(); - /// assert!(matches!(doc.get_object_id("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert!(doc.get_object_id("unknown")?.is_none()); - /// # Ok::<(), Error>(()) + /// let oid = doc.get_object_id("_id")?; + /// assert!(matches!(doc.get_object_id("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_object_id("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) /// ``` pub fn get_object_id(&self, key: impl AsRef) -> ValueAccessResult { self.get_with(key, ElementType::ObjectId, RawBson::as_object_id) @@ -343,17 +341,17 @@ impl RawDoc { /// /// ``` /// # use bson::raw::Error; - /// use bson::{doc, oid::ObjectId, raw::{RawDocument, ErrorKind}}; + /// use bson::{doc, oid::ObjectId, raw::{RawDocument, ValueAccessErrorKind}}; /// /// let doc = RawDocument::from_document(&doc! { /// "_id": ObjectId::new(), /// "bool": true, /// })?; /// - /// assert!(doc.get_bool("bool")?.unwrap()); - /// assert!(matches!(doc.get_bool("_id").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert!(doc.get_object_id("unknown")?.is_none()); - /// # Ok::<(), Error>(()) + /// assert!(doc.get_bool("bool")?); + /// assert!(matches!(doc.get_bool("_id").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_bool("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) /// ``` pub fn get_bool(&self, key: impl AsRef) -> ValueAccessResult { self.get_with(key, ElementType::Boolean, RawBson::as_bool) @@ -364,7 +362,7 @@ impl RawDoc { /// /// ``` /// # use bson::raw::Error; - /// use bson::{doc, raw::{ErrorKind, RawDocument}, DateTime}; + /// use bson::{doc, raw::{ValueAccessErrorKind, RawDocument}, DateTime}; /// /// let dt = DateTime::now(); /// let doc = RawDocument::from_document(&doc! { @@ -372,10 +370,10 @@ impl RawDoc { /// "bool": true, /// })?; /// - /// assert_eq!(doc.get_datetime("created_at")?, Some(dt)); - /// assert!(matches!(doc.get_datetime("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert!(doc.get_datetime("unknown")?.is_none()); - /// # Ok::<(), Error>(()) + /// assert_eq!(doc.get_datetime("created_at")?, dt); + /// assert!(matches!(doc.get_datetime("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_datetime("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) /// ``` pub fn get_datetime(&self, key: impl AsRef) -> ValueAccessResult { self.get_with(key, ElementType::DateTime, RawBson::as_datetime) @@ -385,8 +383,7 @@ impl RawDoc { /// the key corresponds to a value which isn't a regex. /// /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, Regex, raw::{RawDocument, ErrorKind}}; + /// use bson::{doc, Regex, raw::{RawDocument, ValueAccessErrorKind}}; /// /// let doc = RawDocument::from_document(&doc! { /// "regex": Regex { @@ -396,11 +393,11 @@ impl RawDoc { /// "bool": true, /// })?; /// - /// assert_eq!(doc.get_regex("regex")?.unwrap().pattern(), r"end\s*$"); - /// assert_eq!(doc.get_regex("regex")?.unwrap().options(), "i"); - /// assert!(matches!(doc.get_regex("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert!(doc.get_regex("unknown")?.is_none()); - /// # Ok::<(), Error>(()) + /// assert_eq!(doc.get_regex("regex")?.pattern(), r"end\s*$"); + /// assert_eq!(doc.get_regex("regex")?.options(), "i"); + /// assert!(matches!(doc.get_regex("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_regex("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) /// ``` pub fn get_regex<'a>(&'a self, key: impl AsRef) -> ValueAccessResult> { self.get_with(key, ElementType::RegularExpression, RawBson::as_regex) @@ -411,20 +408,20 @@ impl RawDoc { /// /// ``` /// # use bson::raw::Error; - /// use bson::{doc, Timestamp, raw::{RawDocument, ErrorKind}}; + /// use bson::{doc, Timestamp, raw::{RawDocument, ValueAccessErrorKind}}; /// /// let doc = RawDocument::from_document(&doc! { /// "bool": true, /// "ts": Timestamp { time: 649876543, increment: 9 }, /// })?; /// - /// let timestamp = doc.get_timestamp("ts")?.unwrap(); + /// let timestamp = doc.get_timestamp("ts")?; /// - /// assert_eq!(timestamp.time(), 649876543); - /// assert_eq!(timestamp.increment(), 9); - /// assert!(matches!(doc.get_timestamp("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert_eq!(doc.get_timestamp("unknown"), Ok(None)); - /// # Ok::<(), Error>(()) + /// assert_eq!(timestamp.time, 649876543); + /// assert_eq!(timestamp.increment, 9); + /// assert!(matches!(doc.get_timestamp("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_timestamp("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) /// ``` pub fn get_timestamp(&self, key: impl AsRef) -> ValueAccessResult { self.get_with(key, ElementType::Timestamp, RawBson::as_timestamp) @@ -435,17 +432,17 @@ impl RawDoc { /// /// ``` /// # use bson::raw::Error; - /// use bson::{doc, raw::{RawDocument, ErrorKind}}; + /// use bson::{doc, raw::{RawDocument, ValueAccessErrorKind}}; /// /// let doc = RawDocument::from_document(&doc! { /// "bool": true, /// "i32": 1_000_000, /// })?; /// - /// assert_eq!(doc.get_i32("i32"), Ok(Some(1_000_000))); - /// assert!(matches!(doc.get_i32("bool").unwrap_err().kind, ErrorKind::UnexpectedType { ..})); - /// assert_eq!(doc.get_i32("unknown"), Ok(None)); - /// # Ok::<(), Error>(()) + /// assert_eq!(doc.get_i32("i32")?, 1_000_000); + /// assert!(matches!(doc.get_i32("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { ..})); + /// assert!(matches!(doc.get_i32("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) /// ``` pub fn get_i32(&self, key: impl AsRef) -> ValueAccessResult { self.get_with(key, ElementType::Int32, RawBson::as_i32) @@ -456,17 +453,17 @@ impl RawDoc { /// /// ``` /// # use bson::raw::Error; - /// use bson::{doc, raw::{ErrorKind, RawDocument}}; + /// use bson::{doc, raw::{ValueAccessErrorKind, RawDocument}}; /// /// let doc = RawDocument::from_document(&doc! { /// "bool": true, /// "i64": 9223372036854775807_i64, /// })?; /// - /// assert_eq!(doc.get_i64("i64"), Ok(Some(9223372036854775807))); - /// assert!(matches!(doc.get_i64("bool").unwrap_err().kind, ErrorKind::UnexpectedType { .. })); - /// assert_eq!(doc.get_i64("unknown"), Ok(None)); - /// # Ok::<(), Error>(()) + /// assert_eq!(doc.get_i64("i64")?, 9223372036854775807); + /// assert!(matches!(doc.get_i64("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_i64("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) /// ``` pub fn get_i64(&self, key: impl AsRef) -> ValueAccessResult { self.get_with(key, ElementType::Int64, RawBson::as_i64) diff --git a/src/raw/document.rs b/src/raw/document.rs index 826e59eb..d113e411 100644 --- a/src/raw/document.rs +++ b/src/raw/document.rs @@ -19,12 +19,14 @@ use super::{Error, ErrorKind, Iter, RawBson, RawDoc, Result}; /// the original document without making any additional allocations. /// /// ``` -/// # use bson::raw::{RawDocument, Error}; +/// # use bson::raw::Error; +/// use bson::raw::RawDocument; +/// /// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; /// let mut iter = doc.iter(); /// let (key, value) = iter.next().unwrap()?; /// assert_eq!(key, "hi"); -/// assert_eq!(value.as_str(), Ok("y'all")); +/// assert_eq!(value.as_str(), Some("y'all")); /// assert!(iter.next().is_none()); /// # Ok::<(), Error>(()) /// ``` @@ -36,10 +38,11 @@ use super::{Error, ErrorKind, Iter, RawBson, RawDoc, Result}; /// beginning to find the requested key. /// /// ``` -/// # use bson::raw::{RawDocument, Error}; +/// use bson::raw::RawDocument; +/// /// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; -/// assert_eq!(doc.get_str("hi")?, Some("y'all")); -/// # Ok::<(), Error>(()) +/// assert_eq!(doc.get_str("hi")?, "y'all"); +/// # Ok::<(), Box>(()) /// ``` #[derive(Clone, PartialEq)] pub struct RawDocument { @@ -107,7 +110,7 @@ impl RawDocument { /// for element in doc.iter() { /// let (key, value) = element?; /// assert_eq!(key, "ferris"); - /// assert_eq!(value.as_bool()?, true); + /// assert_eq!(value.as_bool(), Some(true)); /// } /// # Ok::<(), Error>(()) /// ``` diff --git a/src/raw/mod.rs b/src/raw/mod.rs index ace0f4b4..209a1dd1 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -24,8 +24,8 @@ //! let elem = doc.get("hi")?.unwrap(); //! //! assert_eq!( -//! elem.as_str()?, -//! "y'all", +//! elem.as_str(), +//! Some("y'all"), //! ); //! # Ok::<(), bson::raw::Error>(()) //! ``` @@ -49,17 +49,15 @@ //! }; //! //! let raw = RawDocument::from_document(&document)?; -//! let value: Option<&str> = raw +//! let value = raw //! .get_document("goodbye")? -//! .map(|doc| doc.get_str("cruel")) -//! .transpose()? -//! .flatten(); +//! .get_str("cruel")?; //! //! assert_eq!( //! value, -//! Some("world"), +//! "world", //! ); -//! # Ok::<(), bson::raw::Error>(()) +//! # Ok::<(), Box>(()) //! ``` //! //! ### Reference types ([`RawDoc`]) @@ -75,8 +73,8 @@ //! use bson::raw::RawDoc; //! //! let bytes = b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00"; -//! assert_eq!(RawDoc::new(bytes)?.get_str("hi")?, Some("y'all")); -//! # Ok::<(), bson::raw::Error>(()) +//! assert_eq!(RawDoc::new(bytes)?.get_str("hi")?, "y'all"); +//! # Ok::<(), Box>(()) //! ``` //! //! ### Iteration @@ -103,11 +101,11 @@ //! //! let (key, value): (&str, RawBson) = doc_iter.next().unwrap()?; //! assert_eq!(key, "crate"); -//! assert_eq!(value.as_str()?, "bson"); +//! assert_eq!(value.as_str(), Some("bson")); //! //! let (key, value): (&str, RawBson) = doc_iter.next().unwrap()?; //! assert_eq!(key, "year"); -//! assert_eq!(value.as_str()?, "2021"); +//! assert_eq!(value.as_str(), Some("2021")); //! # Ok::<(), bson::raw::Error>(()) //! ``` @@ -129,7 +127,7 @@ pub use self::{ bson::{RawBinary, RawBson, RawJavaScriptCodeWithScope, RawRegex}, doc::RawDoc, document::RawDocument, - error::{Error, ErrorKind, Result}, + error::{Error, ErrorKind, Result, ValueAccessError, ValueAccessErrorKind, ValueAccessResult}, iter::Iter, }; diff --git a/src/raw/test/mod.rs b/src/raw/test/mod.rs index 845f9700..97511c1b 100644 --- a/src/raw/test/mod.rs +++ b/src/raw/test/mod.rs @@ -4,6 +4,7 @@ use super::*; use crate::{ doc, oid::ObjectId, + raw::error::ValueAccessErrorKind, spec::BinarySubtype, Binary, Bson, @@ -175,9 +176,12 @@ fn array() { .expect("no key array") .as_array() .expect("result was not an array"); - assert_eq!(array.get_str(0), Ok(Some("binary"))); - assert_eq!(array.get_str(3), Ok(Some("notation"))); - assert_eq!(array.get_str(4), Ok(None)); + assert_eq!(array.get_str(0), Ok("binary")); + assert_eq!(array.get_str(3), Ok("notation")); + assert_eq!( + array.get_str(4).unwrap_err().kind, + ValueAccessErrorKind::NotPresent + ); } #[test] @@ -186,7 +190,7 @@ fn binary() { "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] } }) .unwrap(); - let binary: elem::RawBinary<'_> = rawdoc + let binary: bson::RawBinary<'_> = rawdoc .get("binary") .expect("error finding key binary") .expect("no key binary") From c06485775c5b0b24d17ae7244957a3de13aca6a4 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 13 Oct 2021 18:18:29 -0400 Subject: [PATCH 39/48] fix clippy --- src/de/raw.rs | 2 +- src/raw/array.rs | 2 +- src/raw/doc.rs | 14 +++++++------- src/raw/test/mod.rs | 12 ++++++------ src/tests/spec/corpus.rs | 19 ++++++++++--------- 5 files changed, 25 insertions(+), 24 deletions(-) diff --git a/src/de/raw.rs b/src/de/raw.rs index 9fafda5c..c48ba3c7 100644 --- a/src/de/raw.rs +++ b/src/de/raw.rs @@ -625,7 +625,7 @@ impl<'de> serde::de::MapAccess<'de> for Decimal128Access { where V: serde::de::DeserializeSeed<'de>, { - seed.deserialize(Decimal128Deserializer(self.decimal.clone())) + seed.deserialize(Decimal128Deserializer(self.decimal)) } } diff --git a/src/raw/array.rs b/src/raw/array.rs index 2d099401..b86e1ef6 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -159,7 +159,7 @@ impl RawArr { /// Gets the DateTime at the given index or returns an error if the value at that index isn't a /// DateTime. pub fn get_datetime(&self, index: usize) -> ValueAccessResult { - Ok(self.get_with(index, ElementType::DateTime, RawBson::as_datetime)?) + self.get_with(index, ElementType::DateTime, RawBson::as_datetime) } /// Gets a reference to the BSON regex at the given index or returns an error if the diff --git a/src/raw/doc.rs b/src/raw/doc.rs index 1e9787ba..c38657bf 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -95,7 +95,7 @@ impl RawDoc { }); } - let length = i32_from_slice(&data)?; + let length = i32_from_slice(data)?; if data.len() as i32 != length { return Err(Error { @@ -163,7 +163,7 @@ impl RawDoc { /// assert!(doc.get("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` - pub fn get<'a>(&'a self, key: impl AsRef) -> Result>> { + pub fn get(&self, key: impl AsRef) -> Result>> { for result in self.into_iter() { let (k, v) = result?; if key.as_ref() == k { @@ -241,7 +241,7 @@ impl RawDoc { /// assert!(matches!(doc.get_str("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); /// # Ok::<(), Box>(()) /// ``` - pub fn get_str<'a>(&'a self, key: impl AsRef) -> ValueAccessResult<&'a str> { + pub fn get_str(&self, key: impl AsRef) -> ValueAccessResult<&'_ str> { self.get_with(key, ElementType::String, RawBson::as_str) } @@ -262,7 +262,7 @@ impl RawDoc { /// assert!(matches!(doc.get_document("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); /// # Ok::<(), Box>(()) /// ``` - pub fn get_document<'a>(&'a self, key: impl AsRef) -> ValueAccessResult<&'a RawDoc> { + pub fn get_document(&self, key: impl AsRef) -> ValueAccessResult<&'_ RawDoc> { self.get_with(key, ElementType::EmbeddedDocument, RawBson::as_document) } @@ -286,7 +286,7 @@ impl RawDoc { /// assert!(matches!(doc.get_array("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); /// # Ok::<(), Box>(()) /// ``` - pub fn get_array<'a>(&'a self, key: impl AsRef) -> ValueAccessResult<&'a RawArr> { + pub fn get_array(&self, key: impl AsRef) -> ValueAccessResult<&'_ RawArr> { self.get_with(key, ElementType::Array, RawBson::as_array) } @@ -311,7 +311,7 @@ impl RawDoc { /// assert!(matches!(doc.get_binary("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); /// # Ok::<(), Box>(()) /// ``` - pub fn get_binary<'a>(&'a self, key: impl AsRef) -> ValueAccessResult> { + pub fn get_binary(&self, key: impl AsRef) -> ValueAccessResult> { self.get_with(key, ElementType::Binary, RawBson::as_binary) } @@ -399,7 +399,7 @@ impl RawDoc { /// assert!(matches!(doc.get_regex("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); /// # Ok::<(), Box>(()) /// ``` - pub fn get_regex<'a>(&'a self, key: impl AsRef) -> ValueAccessResult> { + pub fn get_regex(&self, key: impl AsRef) -> ValueAccessResult> { self.get_with(key, ElementType::RegularExpression, RawBson::as_regex) } diff --git a/src/raw/test/mod.rs b/src/raw/test/mod.rs index 97511c1b..7cd4b8b5 100644 --- a/src/raw/test/mod.rs +++ b/src/raw/test/mod.rs @@ -40,7 +40,7 @@ fn nested_document() { let docbytes = to_bytes(&doc! { "outer": { "inner": "surprise", - "double": 5.5, + "double": 6, }, }); let rawdoc = RawDoc::new(&docbytes).unwrap(); @@ -63,11 +63,11 @@ fn nested_document() { assert_eq!( subdoc .get("double") - .expect("get double result") - .expect("get double option") - .as_f64() - .expect("as f64 result"), - 5.5 + .expect("get i64 result") + .expect("get i64 option") + .as_i64() + .expect("as i64 result"), + 6 ); } diff --git a/src/tests/spec/corpus.rs b/src/tests/spec/corpus.rs index d2e2158f..b2c45dac 100644 --- a/src/tests/spec/corpus.rs +++ b/src/tests/spec/corpus.rs @@ -396,6 +396,16 @@ fn run_test(test: TestFile) { } for decode_error in test.decode_errors.iter() { + let description = format!( + "{} decode error: {}", + test.bson_type, decode_error.description + ); + let bson = hex::decode(&decode_error.bson).expect("should decode from hex"); + + if let Ok(doc) = RawDoc::new(bson.as_slice()) { + Document::try_from(doc).expect_err(description.as_str()); + } + // No meaningful definition of "byte count" for an arbitrary reader. if decode_error.description == "Stated length less than byte count, with garbage after envelope" @@ -403,18 +413,9 @@ fn run_test(test: TestFile) { continue; } - let description = format!( - "{} decode error: {}", - test.bson_type, decode_error.description - ); - let bson = hex::decode(&decode_error.bson).expect("should decode from hex"); Document::from_reader(bson.as_slice()).expect_err(&description); crate::from_reader::<_, Document>(bson.as_slice()).expect_err(description.as_str()); - if let Ok(doc) = RawDoc::new(bson.as_slice()) { - Document::try_from(doc).expect_err(&description.as_str()); - } - if decode_error.description.contains("invalid UTF-8") { crate::from_reader_utf8_lossy::<_, Document>(bson.as_slice()).unwrap_or_else(|err| { panic!( From 454c05a41725731360b44c911eba31a3203e8ee1 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 13 Oct 2021 18:27:36 -0400 Subject: [PATCH 40/48] fix rustdoc --- src/raw/array.rs | 2 +- src/raw/bson.rs | 20 ++++++++++---------- src/raw/doc.rs | 2 +- src/raw/document.rs | 4 ++-- src/raw/error.rs | 4 ++-- src/raw/mod.rs | 21 +++++++++++---------- 6 files changed, 27 insertions(+), 26 deletions(-) diff --git a/src/raw/array.rs b/src/raw/array.rs index b86e1ef6..2199dfd1 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -17,7 +17,7 @@ use crate::{oid::ObjectId, spec::ElementType, Bson, DateTime, Timestamp}; /// /// This is an _unsized_ type, meaning that it must always be used behind a pointer like `&`. /// -/// Accessing elements within a [`RawArr`] is similar to element access in [`bson::Document`], +/// Accessing elements within a [`RawArr`] is similar to element access in [`crate::Document`], /// but because the contents are parsed during iteration instead of at creation time, format errors /// can happen at any time during use. /// diff --git a/src/raw/bson.rs b/src/raw/bson.rs index 596593c0..c2377e14 100644 --- a/src/raw/bson.rs +++ b/src/raw/bson.rs @@ -85,7 +85,7 @@ impl<'a> RawBson<'a> { } } - /// Gets the f64 that's referenced or returns `None` if the referenced value isn't a BSON + /// Gets the `f64` that's referenced or returns `None` if the referenced value isn't a BSON /// double. pub fn as_f64(self) -> Option { match self { @@ -103,8 +103,8 @@ impl<'a> RawBson<'a> { } } - /// Gets the [`&RawArr`] that's referenced or returns `None` if the referenced value isn't a - /// BSON array. + /// Gets the [`crate::raw::RawArr`] that's referenced or returns `None` if the referenced value + /// isn't a BSON array. pub fn as_array(self) -> Option<&'a RawArr> { match self { RawBson::Array(v) => Some(v), @@ -112,7 +112,7 @@ impl<'a> RawBson<'a> { } } - /// Gets the [`&RawDoc`] that's referenced or returns `None` if the referenced value + /// Gets the [`crate::raw::RawDoc`] that's referenced or returns `None` if the referenced value /// isn't a BSON document. pub fn as_document(self) -> Option<&'a RawDoc> { match self { @@ -148,8 +148,8 @@ impl<'a> RawBson<'a> { } } - /// Gets the [`ObjectId`] that's referenced or returns `None` if the referenced value isn't a - /// BSON ObjectID. + /// Gets the [`crate::oid::ObjectId`] that's referenced or returns `None` if the referenced + /// value isn't a BSON ObjectID. pub fn as_object_id(self) -> Option { match self { RawBson::ObjectId(v) => Some(v), @@ -175,8 +175,8 @@ impl<'a> RawBson<'a> { } } - /// Gets the [`DateTime`] that's referenced or returns `None` if the referenced value isn't a - /// BSON datetime. + /// Gets the [`crate::DateTime`] that's referenced or returns `None` if the referenced value + /// isn't a BSON datetime. pub fn as_datetime(self) -> Option { match self { RawBson::DateTime(v) => Some(v), @@ -193,8 +193,8 @@ impl<'a> RawBson<'a> { } } - /// Gets the [`Timestamp`] that's referenced or returns `None` if the referenced value isn't a - /// BSON timestamp. + /// Gets the [`crate::Timestamp`] that's referenced or returns `None` if the referenced value + /// isn't a BSON timestamp. pub fn as_timestamp(self) -> Option { match self { RawBson::Timestamp(timestamp) => Some(timestamp), diff --git a/src/raw/doc.rs b/src/raw/doc.rs index c38657bf..d42b5d02 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -26,7 +26,7 @@ use crate::{oid::ObjectId, spec::ElementType, Document}; /// This is an _unsized_ type, meaning that it must always be used behind a pointer like `&`. For an /// owned version of this type, see [`RawDocument`]. /// -/// Accessing elements within a [`RawDoc`] is similar to element access in [`bson::Document`], +/// Accessing elements within a [`RawDoc`] is similar to element access in [`crate::Document`], /// but because the contents are parsed during iteration instead of at creation time, format errors /// can happen at any time during use. /// diff --git a/src/raw/document.rs b/src/raw/document.rs index d113e411..71948664 100644 --- a/src/raw/document.rs +++ b/src/raw/document.rs @@ -9,9 +9,9 @@ use crate::Document; use super::{Error, ErrorKind, Iter, RawBson, RawDoc, Result}; /// A BSON document, stored as raw bytes on the heap. This can be created from a `Vec` or -/// a [`bson::Document`]. +/// a [`crate::Document`]. /// -/// Accessing elements within a [`RawDocument`] is similar to element access in [`bson::Document`], +/// Accessing elements within a [`RawDocument`] is similar to element access in [`crate::Document`], /// but because the contents are parsed during iteration instead of at creation time, format errors /// can happen at any time during use. /// diff --git a/src/raw/error.rs b/src/raw/error.rs index d6516380..a35ee28b 100644 --- a/src/raw/error.rs +++ b/src/raw/error.rs @@ -74,7 +74,7 @@ pub type Result = std::result::Result; pub type ValueAccessResult = std::result::Result; /// Error to indicate that either a value was empty or it contained an unexpected -/// type, for use with the direct getters (e.g. [`RawDoc::get_str`]). +/// type, for use with the direct getters (e.g. [`crate::raw::RawDoc::get_str`]). #[derive(Debug, PartialEq, Clone)] #[non_exhaustive] pub struct ValueAccessError { @@ -85,7 +85,7 @@ pub struct ValueAccessError { pub key: String, } -/// The type of error encountered when using a direct getter (e.g. [`RawDoc::get_str`]). +/// The type of error encountered when using a direct getter (e.g. [`crate::raw::RawDoc::get_str`]). #[derive(Debug, PartialEq, Clone)] #[non_exhaustive] pub enum ValueAccessErrorKind { diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 209a1dd1..18b1231d 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -1,16 +1,17 @@ //! An API for interacting with raw BSON bytes. //! -//! This module provides two document types, [`RawDocument`] and [`RawDoc`] (akin to [`std::String`] -//! and [`std::str`], for working with raw BSON documents. These types differ from the regular -//! [`Document`] type in that their storage is BSON bytes rather than a hash-map like Rust type. In -//! certain circumstances, these types can be leveraged for increased performance. +//! This module provides two document types, [`RawDocument`] and [`RawDoc`] (akin to +//! [`std::string::String`] and [`str`]), for working with raw BSON documents. These types differ +//! from the regular [`crate::Document`] type in that their storage is BSON bytes rather than a +//! hash-map like Rust type. In certain circumstances, these types can be leveraged for increased +//! performance. //! //! This module also provides a [`RawBson`] type for modeling any borrowed BSON element and a //! [`RawArr`] type for modeling a borrowed slice of a document containing a BSON array element. //! //! A [`RawDocument`] can be created from a `Vec` containing raw BSON data, and elements -//! accessed via methods similar to those available on the [`Document`] type. Note that -//! [`RawDocument::get`] returns a [`raw::Result>`], since the bytes contained in +//! accessed via methods similar to those available on the [`crate::Document`] type. Note that +//! [`RawDoc::get`] returns a [`Result>`], since the bytes contained in //! the document are not fully validated until trying to access the contained data. //! //! ```rust @@ -30,10 +31,10 @@ //! # Ok::<(), bson::raw::Error>(()) //! ``` //! -//! ### [`Document`] interop +//! ### [`crate::Document`] interop //! -//! A [`RawDocument`] can be created from a [`Document`]. Internally, this -//! serializes the [`Document`] to a `Vec`, and then includes those bytes in the +//! A [`RawDocument`] can be created from a [`crate::Document`]. Internally, this +//! serializes the [`crate::Document`] to a `Vec`, and then includes those bytes in the //! [`RawDocument`]. //! //! ```rust @@ -124,7 +125,7 @@ use crate::de::MIN_BSON_STRING_SIZE; pub use self::{ array::{RawArr, RawArrIter}, - bson::{RawBinary, RawBson, RawJavaScriptCodeWithScope, RawRegex}, + bson::{RawBinary, RawBson, RawDbPointer, RawJavaScriptCodeWithScope, RawRegex}, doc::RawDoc, document::RawDocument, error::{Error, ErrorKind, Result, ValueAccessError, ValueAccessErrorKind, ValueAccessResult}, From a06068ef6320213a590304f689e5296694bbfee8 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 13 Oct 2021 18:29:48 -0400 Subject: [PATCH 41/48] fix test --- src/raw/test/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/raw/test/mod.rs b/src/raw/test/mod.rs index 7cd4b8b5..65c0f17a 100644 --- a/src/raw/test/mod.rs +++ b/src/raw/test/mod.rs @@ -40,7 +40,7 @@ fn nested_document() { let docbytes = to_bytes(&doc! { "outer": { "inner": "surprise", - "double": 6, + "i64": 6_i64, }, }); let rawdoc = RawDoc::new(&docbytes).unwrap(); @@ -62,7 +62,7 @@ fn nested_document() { assert_eq!( subdoc - .get("double") + .get("i64") .expect("get i64 result") .expect("get i64 option") .as_i64() From 23792981dbc7755bcbaef239cd02fe1abd0979a3 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 13 Oct 2021 19:20:57 -0400 Subject: [PATCH 42/48] minor cleanup --- src/raw/array.rs | 2 +- src/raw/doc.rs | 1 - src/raw/error.rs | 30 ++++++++++++++++------- src/raw/mod.rs | 59 ++++++++++++++++++--------------------------- src/raw/test/mod.rs | 1 - 5 files changed, 45 insertions(+), 48 deletions(-) diff --git a/src/raw/array.rs b/src/raw/array.rs index 2199dfd1..f6fd24ac 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -21,7 +21,7 @@ use crate::{oid::ObjectId, spec::ElementType, Bson, DateTime, Timestamp}; /// but because the contents are parsed during iteration instead of at creation time, format errors /// can happen at any time during use. /// -/// Iterating over a [`RawArr`] yields either an error or a key-value pair that borrows from the +/// Iterating over a [`RawArr`] yields either an error or a value that borrows from the /// original document without making any additional allocations. /// /// ``` diff --git a/src/raw/doc.rs b/src/raw/doc.rs index d42b5d02..b810af0b 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -32,7 +32,6 @@ use crate::{oid::ObjectId, spec::ElementType, Document}; /// /// Iterating over a [`RawDoc`] yields either an error or a key-value pair that borrows from the /// original document without making any additional allocations. - /// ``` /// # use bson::raw::{Error}; /// use bson::raw::RawDoc; diff --git a/src/raw/error.rs b/src/raw/error.rs index a35ee28b..5022376a 100644 --- a/src/raw/error.rs +++ b/src/raw/error.rs @@ -10,7 +10,7 @@ pub struct Error { pub kind: ErrorKind, /// They key associated with the error, if any. - pub key: Option, + pub(crate) key: Option, } impl Error { @@ -29,12 +29,11 @@ impl Error { self.key = Some(key.as_ref().to_string()); self } -} -/// Execute the provided closure, mapping the key of the returned error (if any) to the provided -/// key. -pub(crate) fn try_with_key Result>(key: impl AsRef, f: F) -> Result { - f().map_err(|e| e.with_key(key)) + /// The key at which the error was encountered, if any. + pub fn key(&self) -> Option<&str> { + self.key.as_deref() + } } /// The different categories of errors that can be returned when reading from raw BSON. @@ -45,8 +44,7 @@ pub enum ErrorKind { #[non_exhaustive] MalformedValue { message: String }, - /// Improper UTF-8 bytes were found when proper UTF-8 was expected. The error value contains - /// the malformed data as bytes. + /// Improper UTF-8 bytes were found when proper UTF-8 was expected. Utf8EncodingError(Utf8Error), } @@ -71,6 +69,13 @@ impl std::fmt::Display for Error { impl std::error::Error for Error {} pub type Result = std::result::Result; + +/// Execute the provided closure, mapping the key of the returned error (if any) to the provided +/// key. +pub(crate) fn try_with_key Result>(key: impl AsRef, f: F) -> Result { + f().map_err(|e| e.with_key(key)) +} + pub type ValueAccessResult = std::result::Result; /// Error to indicate that either a value was empty or it contained an unexpected @@ -82,7 +87,14 @@ pub struct ValueAccessError { pub kind: ValueAccessErrorKind, /// The key at which the error was encountered. - pub key: String, + pub(crate) key: String, +} + +impl ValueAccessError { + /// The key at which the error was encountered. + pub fn key(&self) -> &str { + self.key.as_str() + } } /// The type of error encountered when using a direct getter (e.g. [`crate::raw::RawDoc::get_str`]). diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 18b1231d..bbc8bb99 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -138,11 +138,10 @@ fn f64_from_slice(val: &[u8]) -> Result { let arr = val .get(0..8) .and_then(|s| s.try_into().ok()) - .ok_or_else(|| Error { - key: None, - kind: ErrorKind::MalformedValue { + .ok_or_else(|| { + Error::new_without_key(ErrorKind::MalformedValue { message: format!("expected 8 bytes to read double, instead got {}", val.len()), - }, + }) })?; Ok(f64::from_le_bytes(arr)) } @@ -153,11 +152,10 @@ fn i32_from_slice(val: &[u8]) -> Result { let arr = val .get(0..4) .and_then(|s| s.try_into().ok()) - .ok_or_else(|| Error { - key: None, - kind: ErrorKind::MalformedValue { + .ok_or_else(|| { + Error::new_without_key(ErrorKind::MalformedValue { message: format!("expected 4 bytes to read i32, instead got {}", val.len()), - }, + }) })?; Ok(i32::from_le_bytes(arr)) } @@ -168,32 +166,27 @@ fn i64_from_slice(val: &[u8]) -> Result { let arr = val .get(0..8) .and_then(|s| s.try_into().ok()) - .ok_or_else(|| Error { - key: None, - kind: ErrorKind::MalformedValue { + .ok_or_else(|| { + Error::new_without_key(ErrorKind::MalformedValue { message: format!("expected 8 bytes to read i64, instead got {}", val.len()), - }, + }) })?; Ok(i64::from_le_bytes(arr)) } fn read_nullterminated(buf: &[u8]) -> Result<&str> { let mut splits = buf.splitn(2, |x| *x == 0); - let value = splits.next().ok_or_else(|| Error { - key: None, - kind: ErrorKind::MalformedValue { + let value = splits.next().ok_or_else(|| { + Error::new_without_key(ErrorKind::MalformedValue { message: "no value".into(), - }, + }) })?; if splits.next().is_some() { Ok(try_to_str(value)?) } else { - Err(Error { - key: None, - kind: ErrorKind::MalformedValue { - message: "expected null terminator".into(), - }, - }) + Err(Error::new_without_key(ErrorKind::MalformedValue { + message: "expected null terminator".into(), + })) } } @@ -211,16 +204,13 @@ fn read_lenencoded(buf: &[u8]) -> Result<&str> { } if buf.len() < end { - return Err(Error { - key: None, - kind: ErrorKind::MalformedValue { - message: format!( - "expected buffer to contain at least {} bytes, but it only has {}", - end, - buf.len() - ), - }, - }); + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: format!( + "expected buffer to contain at least {} bytes, but it only has {}", + end, + buf.len() + ), + })); } if buf[end - 1] != 0 { @@ -236,10 +226,7 @@ fn read_lenencoded(buf: &[u8]) -> Result<&str> { fn try_to_str(data: &[u8]) -> Result<&str> { match std::str::from_utf8(data) { Ok(s) => Ok(s), - Err(e) => Err(Error { - key: None, - kind: ErrorKind::Utf8EncodingError(e), - }), + Err(e) => Err(Error::new_without_key(ErrorKind::Utf8EncodingError(e))), } } diff --git a/src/raw/test/mod.rs b/src/raw/test/mod.rs index 65c0f17a..b44cedd4 100644 --- a/src/raw/test/mod.rs +++ b/src/raw/test/mod.rs @@ -484,7 +484,6 @@ proptest! { #[test] fn roundtrip_bson(bson in arbitrary_bson()) { - println!("{:?}", bson); let doc = doc!{"bson": bson}; let raw = to_bytes(&doc); let raw = RawDocument::new(raw); From 2220a3fa9ceae87de5b2c5437a4322a6e74f3ae0 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Thu, 14 Oct 2021 13:20:04 -0400 Subject: [PATCH 43/48] test roundtrip --- src/raw/test/mod.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/raw/test/mod.rs b/src/raw/test/mod.rs index b44cedd4..8bbc97d6 100644 --- a/src/raw/test/mod.rs +++ b/src/raw/test/mod.rs @@ -116,7 +116,14 @@ fn rawdoc_to_doc() { }); let rawdoc = RawDoc::new(&docbytes).expect("invalid document"); - let _doc: crate::Document = rawdoc.try_into().expect("invalid bson"); + let doc: crate::Document = rawdoc.try_into().expect("invalid bson"); + let round_tripped_bytes = crate::to_vec(&doc).expect("serialize should work"); + assert_eq!(round_tripped_bytes, docbytes); + + let mut vec_writer_bytes = vec![]; + doc.to_writer(&mut vec_writer_bytes) + .expect("to writer should work"); + assert_eq!(vec_writer_bytes, docbytes); } #[test] From bd2daee94206aa8b1c6a8ffb5d8216a786d78816 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Thu, 14 Oct 2021 13:21:26 -0400 Subject: [PATCH 44/48] use map_err --- src/raw/mod.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index bbc8bb99..b93c7d1d 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -224,10 +224,7 @@ fn read_lenencoded(buf: &[u8]) -> Result<&str> { } fn try_to_str(data: &[u8]) -> Result<&str> { - match std::str::from_utf8(data) { - Ok(s) => Ok(s), - Err(e) => Err(Error::new_without_key(ErrorKind::Utf8EncodingError(e))), - } + std::str::from_utf8(data).map_err(|e| Error::new_without_key(ErrorKind::Utf8EncodingError(e))) } fn usize_try_from_i32(i: i32) -> Result { From 617235d7a1f16555f089990ab38587c2f59bb93f Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Thu, 14 Oct 2021 13:23:22 -0400 Subject: [PATCH 45/48] use impl Intoi in error constructor --- src/raw/error.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/raw/error.rs b/src/raw/error.rs index 5022376a..c6f3fefe 100644 --- a/src/raw/error.rs +++ b/src/raw/error.rs @@ -14,10 +14,10 @@ pub struct Error { } impl Error { - pub(crate) fn new_with_key(key: impl AsRef, kind: ErrorKind) -> Self { + pub(crate) fn new_with_key(key: impl Into, kind: ErrorKind) -> Self { Self { kind, - key: Some(key.as_ref().to_string()), + key: Some(key.into()), } } From 82099e5872eea966019ff19f80d6c52b7ea7b81f Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Mon, 18 Oct 2021 13:10:27 -0400 Subject: [PATCH 46/48] fix up docstrings --- src/raw/doc.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/raw/doc.rs b/src/raw/doc.rs index b810af0b..7a7c594c 100644 --- a/src/raw/doc.rs +++ b/src/raw/doc.rs @@ -64,7 +64,7 @@ pub struct RawDoc { } impl RawDoc { - /// Constructs a new RawDoc, validating _only_ the + /// Constructs a new [`RawDoc`], validating _only_ the /// following invariants: /// * `data` is at least five bytes long (the minimum for a valid BSON document) /// * the initial four bytes of `data` accurately represent the length of the bytes as @@ -74,7 +74,7 @@ impl RawDoc { /// Note that the internal structure of the bytes representing the /// BSON elements is _not_ validated at all by this method. If the /// bytes do not conform to the BSON spec, then method calls on - /// the RawDocument will return Errors where appropriate. + /// the [`RawDoc`] will return Errors where appropriate. /// /// ``` /// use bson::raw::RawDoc; @@ -131,7 +131,7 @@ impl RawDoc { unsafe { &*(data.as_ref() as *const [u8] as *const RawDoc) } } - /// Creates a new RawDocument with an owned copy of the BSON bytes. + /// Creates a new [`RawDocument`] with an owned copy of the BSON bytes. /// /// ``` /// use bson::raw::{RawDoc, RawDocument, Error}; From 260c245476801bc925248b4b673b83503049efb3 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Mon, 18 Oct 2021 13:10:34 -0400 Subject: [PATCH 47/48] bump proptest to 1.0.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 3a35ff53..414ec8d9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -57,7 +57,7 @@ serde_bytes = "0.11.5" assert_matches = "1.2" criterion = "0.3.0" pretty_assertions = "0.6.1" -proptest = "0.10" +proptest = "1.0.0" serde_bytes = "0.11" chrono = { version = "0.4", features = ["serde"] } From eecedc8c6cf02dd0bdc0a9f46d6201006eeaf409 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Fri, 22 Oct 2021 15:16:55 -0400 Subject: [PATCH 48/48] rename `RawDoc` -> `RawDocument`, `RawDocument` -> `RawDocumentBuf` --- src/lib.rs | 1 + src/raw/array.rs | 66 ++--- src/raw/bson.rs | 18 +- src/raw/doc.rs | 531 --------------------------------------- src/raw/document.rs | 522 +++++++++++++++++++++++++++++++------- src/raw/document_buf.rs | 197 +++++++++++++++ src/raw/error.rs | 4 +- src/raw/iter.rs | 19 +- src/raw/mod.rs | 50 ++-- src/raw/test/mod.rs | 48 ++-- src/tests/spec/corpus.rs | 8 +- 11 files changed, 735 insertions(+), 729 deletions(-) delete mode 100644 src/raw/doc.rs create mode 100644 src/raw/document_buf.rs diff --git a/src/lib.rs b/src/lib.rs index d7aca074..d9379dca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -280,6 +280,7 @@ pub use self::{ Deserializer, }, decimal128::Decimal128, + raw::{RawDocument, RawDocumentBuf, RawArray}, ser::{to_bson, to_document, to_vec, Serializer}, uuid::{Uuid, UuidRepresentation}, }; diff --git a/src/raw/array.rs b/src/raw/array.rs index f6fd24ac..684a4a4c 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -6,33 +6,33 @@ use super::{ Iter, RawBinary, RawBson, - RawDoc, + RawDocument, RawRegex, Result, }; use crate::{oid::ObjectId, spec::ElementType, Bson, DateTime, Timestamp}; /// A slice of a BSON document containing a BSON array value (akin to [`std::str`]). This can be -/// retrieved from a [`RawDoc`] via [`RawDoc::get`]. +/// retrieved from a [`RawDocument`] via [`RawDocument::get`]. /// /// This is an _unsized_ type, meaning that it must always be used behind a pointer like `&`. /// -/// Accessing elements within a [`RawArr`] is similar to element access in [`crate::Document`], +/// Accessing elements within a [`RawArray`] is similar to element access in [`crate::Document`], /// but because the contents are parsed during iteration instead of at creation time, format errors /// can happen at any time during use. /// -/// Iterating over a [`RawArr`] yields either an error or a value that borrows from the +/// Iterating over a [`RawArray`] yields either an error or a value that borrows from the /// original document without making any additional allocations. /// /// ``` -/// use bson::{doc, raw::RawDoc}; +/// use bson::{doc, raw::RawDocument}; /// /// let doc = doc! { /// "x": [1, true, "two", 5.5] /// }; /// let bytes = bson::to_vec(&doc)?; /// -/// let rawdoc = RawDoc::new(bytes.as_slice())?; +/// let rawdoc = RawDocument::new(bytes.as_slice())?; /// let rawarray = rawdoc.get_array("x")?; /// /// for v in rawarray { @@ -41,21 +41,21 @@ use crate::{oid::ObjectId, spec::ElementType, Bson, DateTime, Timestamp}; /// # Ok::<(), Box>(()) /// ``` /// -/// Individual elements can be accessed using [`RawArr::get`] or any of -/// the type-specific getters, such as [`RawArr::get_object_id`] or -/// [`RawArr::get_str`]. Note that accessing elements is an O(N) operation, as it +/// Individual elements can be accessed using [`RawArray::get`] or any of +/// the type-specific getters, such as [`RawArray::get_object_id`] or +/// [`RawArray::get_str`]. Note that accessing elements is an O(N) operation, as it /// requires iterating through the array from the beginning to find the requested index. /// /// ``` /// # use bson::raw::{ValueAccessError}; -/// use bson::{doc, raw::RawDoc}; +/// use bson::{doc, raw::RawDocument}; /// /// let doc = doc! { /// "x": [1, true, "two", 5.5] /// }; /// let bytes = bson::to_vec(&doc)?; /// -/// let rawdoc = RawDoc::new(bytes.as_slice())?; +/// let rawdoc = RawDocument::new(bytes.as_slice())?; /// let rawarray = rawdoc.get_array("x")?; /// /// assert_eq!(rawarray.get_bool(1)?, true); @@ -63,22 +63,22 @@ use crate::{oid::ObjectId, spec::ElementType, Bson, DateTime, Timestamp}; /// ``` #[derive(PartialEq)] #[repr(transparent)] -pub struct RawArr { - pub(crate) doc: RawDoc, +pub struct RawArray { + pub(crate) doc: RawDocument, } -impl RawArr { - pub(crate) fn from_doc(doc: &RawDoc) -> &RawArr { +impl RawArray { + pub(crate) fn from_doc(doc: &RawDocument) -> &RawArray { // SAFETY: // // Dereferencing a raw pointer requires unsafe due to the potential that the pointer is // null, dangling, or misaligned. We know the pointer is not null or dangling due to the - // fact that it's created by a safe reference. Converting &RawDoc to *const - // RawDoc will be properly aligned due to them being references to the same type, - // and converting *const RawDoc to *const RawArr is aligned due to the fact that - // the only field in a RawArr is a RawDoc, meaning the structs are represented + // fact that it's created by a safe reference. Converting &RawDocument to *const + // RawDocument will be properly aligned due to them being references to the same type, + // and converting *const RawDocument to *const RawArray is aligned due to the fact that + // the only field in a RawArray is a RawDocument, meaning the structs are represented // identically at the byte level. - unsafe { &*(doc as *const RawDoc as *const RawArr) } + unsafe { &*(doc as *const RawDocument as *const RawArray) } } /// Gets a reference to the value at the given index. @@ -128,13 +128,13 @@ impl RawArr { /// Gets a reference to the document at the given index or returns an error if the /// value at that index isn't a document. - pub fn get_document(&self, index: usize) -> ValueAccessResult<&RawDoc> { + pub fn get_document(&self, index: usize) -> ValueAccessResult<&RawDocument> { self.get_with(index, ElementType::EmbeddedDocument, RawBson::as_document) } /// Gets a reference to the array at the given index or returns an error if the /// value at that index isn't a array. - pub fn get_array(&self, index: usize) -> ValueAccessResult<&RawArr> { + pub fn get_array(&self, index: usize) -> ValueAccessResult<&RawArray> { self.get_with(index, ElementType::Array, RawBson::as_array) } @@ -186,24 +186,24 @@ impl RawArr { self.get_with(index, ElementType::Int64, RawBson::as_i64) } - /// Gets a reference to the raw bytes of the RawArr. + /// Gets a reference to the raw bytes of the [`RawArray`]. pub fn as_bytes(&self) -> &[u8] { self.doc.as_bytes() } } -impl std::fmt::Debug for RawArr { +impl std::fmt::Debug for RawArray { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("RawArr") + f.debug_struct("RawArray") .field("data", &hex::encode(self.doc.as_bytes())) .finish() } } -impl TryFrom<&RawArr> for Vec { +impl TryFrom<&RawArray> for Vec { type Error = Error; - fn try_from(arr: &RawArr) -> Result> { + fn try_from(arr: &RawArray) -> Result> { arr.into_iter() .map(|result| { let rawbson = result?; @@ -213,23 +213,23 @@ impl TryFrom<&RawArr> for Vec { } } -impl<'a> IntoIterator for &'a RawArr { - type IntoIter = RawArrIter<'a>; +impl<'a> IntoIterator for &'a RawArray { + type IntoIter = RawArrayIter<'a>; type Item = Result>; - fn into_iter(self) -> RawArrIter<'a> { - RawArrIter { + fn into_iter(self) -> RawArrayIter<'a> { + RawArrayIter { inner: self.doc.into_iter(), } } } /// An iterator over borrowed raw BSON array values. -pub struct RawArrIter<'a> { +pub struct RawArrayIter<'a> { inner: Iter<'a>, } -impl<'a> Iterator for RawArrIter<'a> { +impl<'a> Iterator for RawArrayIter<'a> { type Item = Result>; fn next(&mut self) -> Option>> { diff --git a/src/raw/bson.rs b/src/raw/bson.rs index c2377e14..05ae4e19 100644 --- a/src/raw/bson.rs +++ b/src/raw/bson.rs @@ -1,6 +1,6 @@ use std::convert::{TryFrom, TryInto}; -use super::{Error, RawArr, RawDoc, Result}; +use super::{Error, RawArray, RawDocument, Result}; use crate::{ oid::{self, ObjectId}, spec::{BinarySubtype, ElementType}, @@ -18,9 +18,9 @@ pub enum RawBson<'a> { /// UTF-8 string String(&'a str), /// Array - Array(&'a RawArr), + Array(&'a RawArray), /// Embedded document - Document(&'a RawDoc), + Document(&'a RawDocument), /// Boolean value Boolean(bool), /// Null value @@ -103,18 +103,18 @@ impl<'a> RawBson<'a> { } } - /// Gets the [`crate::raw::RawArr`] that's referenced or returns `None` if the referenced value + /// Gets the [`RawArray`] that's referenced or returns `None` if the referenced value /// isn't a BSON array. - pub fn as_array(self) -> Option<&'a RawArr> { + pub fn as_array(self) -> Option<&'a RawArray> { match self { RawBson::Array(v) => Some(v), _ => None, } } - /// Gets the [`crate::raw::RawDoc`] that's referenced or returns `None` if the referenced value + /// Gets the [`RawDocument`] that's referenced or returns `None` if the referenced value /// isn't a BSON document. - pub fn as_document(self) -> Option<&'a RawDoc> { + pub fn as_document(self) -> Option<&'a RawDocument> { match self { RawBson::Document(v) => Some(v), _ => None, @@ -341,7 +341,7 @@ impl<'a> RawRegex<'a> { #[derive(Clone, Copy, Debug, PartialEq)] pub struct RawJavaScriptCodeWithScope<'a> { pub(crate) code: &'a str, - pub(crate) scope: &'a RawDoc, + pub(crate) scope: &'a RawDocument, } impl<'a> RawJavaScriptCodeWithScope<'a> { @@ -351,7 +351,7 @@ impl<'a> RawJavaScriptCodeWithScope<'a> { } /// Gets the scope in the value. - pub fn scope(self) -> &'a RawDoc { + pub fn scope(self) -> &'a RawDocument { self.scope } } diff --git a/src/raw/doc.rs b/src/raw/doc.rs deleted file mode 100644 index 7a7c594c..00000000 --- a/src/raw/doc.rs +++ /dev/null @@ -1,531 +0,0 @@ -use std::{ - borrow::Cow, - convert::{TryFrom, TryInto}, -}; - -use crate::{raw::error::ErrorKind, DateTime, Timestamp}; - -use super::{ - error::{ValueAccessError, ValueAccessErrorKind, ValueAccessResult}, - i32_from_slice, - Error, - Iter, - RawArr, - RawBinary, - RawBson, - RawDocument, - RawRegex, - Result, -}; -use crate::{oid::ObjectId, spec::ElementType, Document}; - -/// A slice of a BSON document (akin to [`std::str`]). This can be created from a -/// [`RawDocument`] or any type that contains valid BSON data, including static binary literals, -/// [Vec](std::vec::Vec), or arrays. -/// -/// This is an _unsized_ type, meaning that it must always be used behind a pointer like `&`. For an -/// owned version of this type, see [`RawDocument`]. -/// -/// Accessing elements within a [`RawDoc`] is similar to element access in [`crate::Document`], -/// but because the contents are parsed during iteration instead of at creation time, format errors -/// can happen at any time during use. -/// -/// Iterating over a [`RawDoc`] yields either an error or a key-value pair that borrows from the -/// original document without making any additional allocations. -/// ``` -/// # use bson::raw::{Error}; -/// use bson::raw::RawDoc; -/// -/// let doc = RawDoc::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00")?; -/// let mut iter = doc.into_iter(); -/// let (key, value) = iter.next().unwrap()?; -/// assert_eq!(key, "hi"); -/// assert_eq!(value.as_str(), Some("y'all")); -/// assert!(iter.next().is_none()); -/// # Ok::<(), Error>(()) -/// ``` -/// -/// Individual elements can be accessed using [`RawDoc::get`] or any of -/// the type-specific getters, such as [`RawDoc::get_object_id`] or -/// [`RawDoc::get_str`]. Note that accessing elements is an O(N) operation, as it -/// requires iterating through the document from the beginning to find the requested key. -/// -/// ``` -/// use bson::raw::RawDoc; -/// -/// let doc = RawDoc::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00")?; -/// assert_eq!(doc.get_str("hi")?, "y'all"); -/// # Ok::<(), Box>(()) -/// ``` -#[derive(PartialEq)] -#[repr(transparent)] -pub struct RawDoc { - data: [u8], -} - -impl RawDoc { - /// Constructs a new [`RawDoc`], validating _only_ the - /// following invariants: - /// * `data` is at least five bytes long (the minimum for a valid BSON document) - /// * the initial four bytes of `data` accurately represent the length of the bytes as - /// required by the BSON spec. - /// * the last byte of `data` is a 0 - /// - /// Note that the internal structure of the bytes representing the - /// BSON elements is _not_ validated at all by this method. If the - /// bytes do not conform to the BSON spec, then method calls on - /// the [`RawDoc`] will return Errors where appropriate. - /// - /// ``` - /// use bson::raw::RawDoc; - /// - /// let doc = RawDoc::new(b"\x05\0\0\0\0")?; - /// # Ok::<(), bson::raw::Error>(()) - /// ``` - pub fn new + ?Sized>(data: &D) -> Result<&RawDoc> { - let data = data.as_ref(); - - if data.len() < 5 { - return Err(Error { - key: None, - kind: ErrorKind::MalformedValue { - message: "document too short".into(), - }, - }); - } - - let length = i32_from_slice(data)?; - - if data.len() as i32 != length { - return Err(Error { - key: None, - kind: ErrorKind::MalformedValue { - message: "document length incorrect".into(), - }, - }); - } - - if data[data.len() - 1] != 0 { - return Err(Error { - key: None, - kind: ErrorKind::MalformedValue { - message: "document not null-terminated".into(), - }, - }); - } - - Ok(RawDoc::new_unchecked(data)) - } - - /// Creates a new Doc referencing the provided data slice. - pub(crate) fn new_unchecked + ?Sized>(data: &D) -> &RawDoc { - // SAFETY: - // - // Dereferencing a raw pointer requires unsafe due to the potential that the pointer is - // null, dangling, or misaligned. We know the pointer is not null or dangling due to the - // fact that it's created by a safe reference. Converting &[u8] to *const [u8] will be - // properly aligned due to them being references to the same type, and converting *const - // [u8] to *const RawDoc is aligned due to the fact that the only field in a - // RawDoc is a [u8] and it is #[repr(transparent), meaning the structs are represented - // identically at the byte level. - unsafe { &*(data.as_ref() as *const [u8] as *const RawDoc) } - } - - /// Creates a new [`RawDocument`] with an owned copy of the BSON bytes. - /// - /// ``` - /// use bson::raw::{RawDoc, RawDocument, Error}; - /// - /// let data = b"\x05\0\0\0\0"; - /// let doc_ref = RawDoc::new(data)?; - /// let doc: RawDocument = doc_ref.to_raw_document(); - /// # Ok::<(), Error>(()) - pub fn to_raw_document(&self) -> RawDocument { - // unwrap is ok here because we already verified the bytes in `RawDocumentRef::new` - RawDocument::new(self.data.to_owned()).unwrap() - } - - /// Gets a reference to the value corresponding to the given key by iterating until the key is - /// found. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, oid::ObjectId, raw::{RawDocument, RawBson}}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "_id": ObjectId::new(), - /// "f64": 2.5, - /// })?; - /// - /// let element = doc.get("f64")?.expect("finding key f64"); - /// assert_eq!(element.as_f64(), Some(2.5)); - /// assert!(doc.get("unknown")?.is_none()); - /// # Ok::<(), Error>(()) - /// ``` - pub fn get(&self, key: impl AsRef) -> Result>> { - for result in self.into_iter() { - let (k, v) = result?; - if key.as_ref() == k { - return Ok(Some(v)); - } - } - Ok(None) - } - - fn get_with<'a, T>( - &'a self, - key: impl AsRef, - expected_type: ElementType, - f: impl FnOnce(RawBson<'a>) -> Option, - ) -> ValueAccessResult { - let key = key.as_ref(); - - let bson = self - .get(key) - .map_err(|e| ValueAccessError { - key: key.to_string(), - kind: ValueAccessErrorKind::InvalidBson(e), - })? - .ok_or(ValueAccessError { - key: key.to_string(), - kind: ValueAccessErrorKind::NotPresent, - })?; - match f(bson) { - Some(t) => Ok(t), - None => Err(ValueAccessError { - key: key.to_string(), - kind: ValueAccessErrorKind::UnexpectedType { - expected: expected_type, - actual: bson.element_type(), - }, - }), - } - } - - /// Gets a reference to the BSON double value corresponding to a given key or returns an error - /// if the key corresponds to a value which isn't a double. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::raw::{ValueAccessErrorKind, RawDocument}; - /// use bson::doc; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "bool": true, - /// "f64": 2.5, - /// })?; - /// - /// assert_eq!(doc.get_f64("f64")?, 2.5); - /// assert!(matches!(doc.get_f64("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); - /// assert!(matches!(doc.get_f64("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); - /// # Ok::<(), Box>(()) - /// ``` - pub fn get_f64(&self, key: impl AsRef) -> ValueAccessResult { - self.get_with(key, ElementType::Double, RawBson::as_f64) - } - - /// Gets a reference to the string value corresponding to a given key or returns an error if the - /// key corresponds to a value which isn't a string. - /// - /// ``` - /// use bson::{doc, raw::{RawDocument, ValueAccessErrorKind}}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "string": "hello", - /// "bool": true, - /// })?; - /// - /// assert_eq!(doc.get_str("string")?, "hello"); - /// assert!(matches!(doc.get_str("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); - /// assert!(matches!(doc.get_str("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); - /// # Ok::<(), Box>(()) - /// ``` - pub fn get_str(&self, key: impl AsRef) -> ValueAccessResult<&'_ str> { - self.get_with(key, ElementType::String, RawBson::as_str) - } - - /// Gets a reference to the document value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't a document. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, raw::{ValueAccessErrorKind, RawDocument}}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "doc": { "key": "value"}, - /// "bool": true, - /// })?; - /// - /// assert_eq!(doc.get_document("doc")?.get_str("key")?, "value"); - /// assert!(matches!(doc.get_document("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); - /// assert!(matches!(doc.get_document("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); - /// # Ok::<(), Box>(()) - /// ``` - pub fn get_document(&self, key: impl AsRef) -> ValueAccessResult<&'_ RawDoc> { - self.get_with(key, ElementType::EmbeddedDocument, RawBson::as_document) - } - - /// Gets a reference to the array value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't an array. - /// - /// ``` - /// use bson::{doc, raw::{RawDocument, ValueAccessErrorKind}}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "array": [true, 3], - /// "bool": true, - /// })?; - /// - /// let mut arr_iter = doc.get_array("array")?.into_iter(); - /// let _: bool = arr_iter.next().unwrap()?.as_bool().unwrap(); - /// let _: i32 = arr_iter.next().unwrap()?.as_i32().unwrap(); - /// - /// assert!(arr_iter.next().is_none()); - /// assert!(doc.get_array("bool").is_err()); - /// assert!(matches!(doc.get_array("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); - /// # Ok::<(), Box>(()) - /// ``` - pub fn get_array(&self, key: impl AsRef) -> ValueAccessResult<&'_ RawArr> { - self.get_with(key, ElementType::Array, RawBson::as_array) - } - - /// Gets a reference to the BSON binary value corresponding to a given key or returns an error - /// if the key corresponds to a value which isn't a binary value. - /// - /// ``` - /// use bson::{ - /// doc, - /// raw::{ValueAccessErrorKind, RawDocument, RawBinary}, - /// spec::BinarySubtype, - /// Binary, - /// }; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1, 2, 3] }, - /// "bool": true, - /// })?; - /// - /// assert_eq!(doc.get_binary("binary")?.as_bytes(), &[1, 2, 3][..]); - /// assert!(matches!(doc.get_binary("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); - /// assert!(matches!(doc.get_binary("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); - /// # Ok::<(), Box>(()) - /// ``` - pub fn get_binary(&self, key: impl AsRef) -> ValueAccessResult> { - self.get_with(key, ElementType::Binary, RawBson::as_binary) - } - - /// Gets a reference to the ObjectId value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't an ObjectId. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, oid::ObjectId, raw::{ValueAccessErrorKind, RawDocument}}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "_id": ObjectId::new(), - /// "bool": true, - /// })?; - /// - /// let oid = doc.get_object_id("_id")?; - /// assert!(matches!(doc.get_object_id("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); - /// assert!(matches!(doc.get_object_id("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); - /// # Ok::<(), Box>(()) - /// ``` - pub fn get_object_id(&self, key: impl AsRef) -> ValueAccessResult { - self.get_with(key, ElementType::ObjectId, RawBson::as_object_id) - } - - /// Gets a reference to the boolean value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't a boolean. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, oid::ObjectId, raw::{RawDocument, ValueAccessErrorKind}}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "_id": ObjectId::new(), - /// "bool": true, - /// })?; - /// - /// assert!(doc.get_bool("bool")?); - /// assert!(matches!(doc.get_bool("_id").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); - /// assert!(matches!(doc.get_bool("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); - /// # Ok::<(), Box>(()) - /// ``` - pub fn get_bool(&self, key: impl AsRef) -> ValueAccessResult { - self.get_with(key, ElementType::Boolean, RawBson::as_bool) - } - - /// Gets a reference to the BSON DateTime value corresponding to a given key or returns an - /// error if the key corresponds to a value which isn't a DateTime. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, raw::{ValueAccessErrorKind, RawDocument}, DateTime}; - /// - /// let dt = DateTime::now(); - /// let doc = RawDocument::from_document(&doc! { - /// "created_at": dt, - /// "bool": true, - /// })?; - /// - /// assert_eq!(doc.get_datetime("created_at")?, dt); - /// assert!(matches!(doc.get_datetime("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); - /// assert!(matches!(doc.get_datetime("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); - /// # Ok::<(), Box>(()) - /// ``` - pub fn get_datetime(&self, key: impl AsRef) -> ValueAccessResult { - self.get_with(key, ElementType::DateTime, RawBson::as_datetime) - } - - /// Gets a reference to the BSON regex value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't a regex. - /// - /// ``` - /// use bson::{doc, Regex, raw::{RawDocument, ValueAccessErrorKind}}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "regex": Regex { - /// pattern: r"end\s*$".into(), - /// options: "i".into(), - /// }, - /// "bool": true, - /// })?; - /// - /// assert_eq!(doc.get_regex("regex")?.pattern(), r"end\s*$"); - /// assert_eq!(doc.get_regex("regex")?.options(), "i"); - /// assert!(matches!(doc.get_regex("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); - /// assert!(matches!(doc.get_regex("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); - /// # Ok::<(), Box>(()) - /// ``` - pub fn get_regex(&self, key: impl AsRef) -> ValueAccessResult> { - self.get_with(key, ElementType::RegularExpression, RawBson::as_regex) - } - - /// Gets a reference to the BSON timestamp value corresponding to a given key or returns an - /// error if the key corresponds to a value which isn't a timestamp. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, Timestamp, raw::{RawDocument, ValueAccessErrorKind}}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "bool": true, - /// "ts": Timestamp { time: 649876543, increment: 9 }, - /// })?; - /// - /// let timestamp = doc.get_timestamp("ts")?; - /// - /// assert_eq!(timestamp.time, 649876543); - /// assert_eq!(timestamp.increment, 9); - /// assert!(matches!(doc.get_timestamp("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); - /// assert!(matches!(doc.get_timestamp("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); - /// # Ok::<(), Box>(()) - /// ``` - pub fn get_timestamp(&self, key: impl AsRef) -> ValueAccessResult { - self.get_with(key, ElementType::Timestamp, RawBson::as_timestamp) - } - - /// Gets a reference to the BSON int32 value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't a 32-bit integer. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, raw::{RawDocument, ValueAccessErrorKind}}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "bool": true, - /// "i32": 1_000_000, - /// })?; - /// - /// assert_eq!(doc.get_i32("i32")?, 1_000_000); - /// assert!(matches!(doc.get_i32("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { ..})); - /// assert!(matches!(doc.get_i32("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); - /// # Ok::<(), Box>(()) - /// ``` - pub fn get_i32(&self, key: impl AsRef) -> ValueAccessResult { - self.get_with(key, ElementType::Int32, RawBson::as_i32) - } - - /// Gets a reference to the BSON int64 value corresponding to a given key or returns an error if - /// the key corresponds to a value which isn't a 64-bit integer. - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, raw::{ValueAccessErrorKind, RawDocument}}; - /// - /// let doc = RawDocument::from_document(&doc! { - /// "bool": true, - /// "i64": 9223372036854775807_i64, - /// })?; - /// - /// assert_eq!(doc.get_i64("i64")?, 9223372036854775807); - /// assert!(matches!(doc.get_i64("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); - /// assert!(matches!(doc.get_i64("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); - /// # Ok::<(), Box>(()) - /// ``` - pub fn get_i64(&self, key: impl AsRef) -> ValueAccessResult { - self.get_with(key, ElementType::Int64, RawBson::as_i64) - } - - /// Return a reference to the contained data as a `&[u8]` - /// - /// ``` - /// # use bson::raw::Error; - /// use bson::{doc, raw::RawDocument}; - /// let docbuf = RawDocument::from_document(&doc!{})?; - /// assert_eq!(docbuf.as_bytes(), b"\x05\x00\x00\x00\x00"); - /// # Ok::<(), Error>(()) - /// ``` - pub fn as_bytes(&self) -> &[u8] { - &self.data - } -} - -impl std::fmt::Debug for RawDoc { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("RawDoc") - .field("data", &hex::encode(&self.data)) - .finish() - } -} - -impl AsRef for RawDoc { - fn as_ref(&self) -> &RawDoc { - self - } -} - -impl ToOwned for RawDoc { - type Owned = RawDocument; - - fn to_owned(&self) -> Self::Owned { - self.to_raw_document() - } -} - -impl<'a> From<&'a RawDoc> for Cow<'a, RawDoc> { - fn from(rdr: &'a RawDoc) -> Self { - Cow::Borrowed(rdr) - } -} - -impl TryFrom<&RawDoc> for crate::Document { - type Error = Error; - - fn try_from(rawdoc: &RawDoc) -> Result { - rawdoc - .into_iter() - .map(|res| res.and_then(|(k, v)| Ok((k.to_owned(), v.try_into()?)))) - .collect() - } -} - -impl<'a> IntoIterator for &'a RawDoc { - type IntoIter = Iter<'a>; - type Item = Result<(&'a str, RawBson<'a>)>; - - fn into_iter(self) -> Iter<'a> { - Iter::new(self) - } -} diff --git a/src/raw/document.rs b/src/raw/document.rs index 71948664..e2141bcc 100644 --- a/src/raw/document.rs +++ b/src/raw/document.rs @@ -1,15 +1,30 @@ use std::{ - borrow::{Borrow, Cow}, - convert::TryFrom, - ops::Deref, + borrow::Cow, + convert::{TryFrom, TryInto}, }; -use crate::Document; +use crate::{raw::error::ErrorKind, DateTime, Timestamp}; -use super::{Error, ErrorKind, Iter, RawBson, RawDoc, Result}; +use super::{ + error::{ValueAccessError, ValueAccessErrorKind, ValueAccessResult}, + i32_from_slice, + Error, + Iter, + RawArray, + RawBinary, + RawBson, + RawDocumentBuf, + RawRegex, + Result, +}; +use crate::{oid::ObjectId, spec::ElementType, Document}; -/// A BSON document, stored as raw bytes on the heap. This can be created from a `Vec` or -/// a [`crate::Document`]. +/// A slice of a BSON document (akin to [`std::str`]). This can be created from a +/// [`RawDocumentBuf`] or any type that contains valid BSON data, including static binary literals, +/// [Vec](std::vec::Vec), or arrays. +/// +/// This is an _unsized_ type, meaning that it must always be used behind a pointer like `&`. For an +/// owned version of this type, see [`RawDocumentBuf`]. /// /// Accessing elements within a [`RawDocument`] is similar to element access in [`crate::Document`], /// but because the contents are parsed during iteration instead of at creation time, format errors @@ -17,13 +32,12 @@ use super::{Error, ErrorKind, Iter, RawBson, RawDoc, Result}; /// /// Iterating over a [`RawDocument`] yields either an error or a key-value pair that borrows from /// the original document without making any additional allocations. -/// /// ``` -/// # use bson::raw::Error; +/// # use bson::raw::{Error}; /// use bson::raw::RawDocument; /// -/// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; -/// let mut iter = doc.iter(); +/// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00")?; +/// let mut iter = doc.into_iter(); /// let (key, value) = iter.next().unwrap()?; /// assert_eq!(key, "hi"); /// assert_eq!(value.as_str(), Some("y'all")); @@ -31,26 +45,26 @@ use super::{Error, ErrorKind, Iter, RawBson, RawDoc, Result}; /// # Ok::<(), Error>(()) /// ``` /// -/// This type implements `Deref` to `RawDoc`, meaning that all methods on `RawDoc` slices are -/// available on `RawDocument` values as well. This includes [`RawDoc::get`] or any of the -/// type-specific getters, such as [`RawDoc::get_object_id`] or [`RawDoc::get_str`]. Note that -/// accessing elements is an O(N) operation, as it requires iterating through the document from the -/// beginning to find the requested key. +/// Individual elements can be accessed using [`RawDocument::get`] or any of +/// the type-specific getters, such as [`RawDocument::get_object_id`] or +/// [`RawDocument::get_str`]. Note that accessing elements is an O(N) operation, as it +/// requires iterating through the document from the beginning to find the requested key. /// /// ``` /// use bson::raw::RawDocument; /// -/// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +/// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00")?; /// assert_eq!(doc.get_str("hi")?, "y'all"); /// # Ok::<(), Box>(()) /// ``` -#[derive(Clone, PartialEq)] +#[derive(PartialEq)] +#[repr(transparent)] pub struct RawDocument { - data: Vec, + data: [u8], } impl RawDocument { - /// Constructs a new RawDocument, validating _only_ the + /// Constructs a new [`RawDocument`], validating _only_ the /// following invariants: /// * `data` is at least five bytes long (the minimum for a valid BSON document) /// * the initial four bytes of `data` accurately represent the length of the bytes as @@ -60,82 +74,411 @@ impl RawDocument { /// Note that the internal structure of the bytes representing the /// BSON elements is _not_ validated at all by this method. If the /// bytes do not conform to the BSON spec, then method calls on - /// the RawDocument will return Errors where appropriate. + /// the [`RawDocument`] will return Errors where appropriate. /// /// ``` - /// # use bson::raw::{RawDocument, Error}; - /// let doc = RawDocument::new(b"\x05\0\0\0\0".to_vec())?; - /// # Ok::<(), Error>(()) + /// use bson::raw::RawDocument; + /// + /// let doc = RawDocument::new(b"\x05\0\0\0\0")?; + /// # Ok::<(), bson::raw::Error>(()) /// ``` - pub fn new(data: Vec) -> Result { - let _ = RawDoc::new(data.as_slice())?; - Ok(Self { data }) + pub fn new + ?Sized>(data: &D) -> Result<&RawDocument> { + let data = data.as_ref(); + + if data.len() < 5 { + return Err(Error { + key: None, + kind: ErrorKind::MalformedValue { + message: "document too short".into(), + }, + }); + } + + let length = i32_from_slice(data)?; + + if data.len() as i32 != length { + return Err(Error { + key: None, + kind: ErrorKind::MalformedValue { + message: "document length incorrect".into(), + }, + }); + } + + if data[data.len() - 1] != 0 { + return Err(Error { + key: None, + kind: ErrorKind::MalformedValue { + message: "document not null-terminated".into(), + }, + }); + } + + Ok(RawDocument::new_unchecked(data)) + } + + /// Creates a new `RawDocument` referencing the provided data slice. + pub(crate) fn new_unchecked + ?Sized>(data: &D) -> &RawDocument { + // SAFETY: + // + // Dereferencing a raw pointer requires unsafe due to the potential that the pointer is + // null, dangling, or misaligned. We know the pointer is not null or dangling due to the + // fact that it's created by a safe reference. Converting &[u8] to *const [u8] will be + // properly aligned due to them being references to the same type, and converting *const + // [u8] to *const RawDocument is aligned due to the fact that the only field in a + // RawDocument is a [u8] and it is #[repr(transparent), meaning the structs are represented + // identically at the byte level. + unsafe { &*(data.as_ref() as *const [u8] as *const RawDocument) } } - /// Create a RawDocument from a Document. + /// Creates a new [`RawDocument`] with an owned copy of the BSON bytes. + /// + /// ``` + /// use bson::raw::{RawDocument, RawDocumentBuf, Error}; + /// + /// let data = b"\x05\0\0\0\0"; + /// let doc_ref = RawDocument::new(data)?; + /// let doc: RawDocumentBuf = doc_ref.to_raw_document_buf(); + /// # Ok::<(), Error>(()) + pub fn to_raw_document_buf(&self) -> RawDocumentBuf { + // unwrap is ok here because we already verified the bytes in `RawDocumentRef::new` + RawDocumentBuf::new(self.data.to_owned()).unwrap() + } + + /// Gets a reference to the value corresponding to the given key by iterating until the key is + /// found. /// /// ``` /// # use bson::raw::Error; - /// use bson::{doc, oid::ObjectId, raw::RawDocument}; + /// use bson::{doc, oid::ObjectId, raw::{RawDocumentBuf, RawBson}}; /// - /// let document = doc! { + /// let doc = RawDocumentBuf::from_document(&doc! { /// "_id": ObjectId::new(), - /// "name": "Herman Melville", - /// "title": "Moby-Dick", - /// }; - /// let doc = RawDocument::from_document(&document)?; + /// "f64": 2.5, + /// })?; + /// + /// let element = doc.get("f64")?.expect("finding key f64"); + /// assert_eq!(element.as_f64(), Some(2.5)); + /// assert!(doc.get("unknown")?.is_none()); /// # Ok::<(), Error>(()) /// ``` - pub fn from_document(doc: &Document) -> Result { - let mut data = Vec::new(); - doc.to_writer(&mut data).map_err(|e| Error { - key: None, - kind: ErrorKind::MalformedValue { - message: e.to_string(), - }, - })?; + pub fn get(&self, key: impl AsRef) -> Result>> { + for result in self.into_iter() { + let (k, v) = result?; + if key.as_ref() == k { + return Ok(Some(v)); + } + } + Ok(None) + } + + fn get_with<'a, T>( + &'a self, + key: impl AsRef, + expected_type: ElementType, + f: impl FnOnce(RawBson<'a>) -> Option, + ) -> ValueAccessResult { + let key = key.as_ref(); - Ok(Self { data }) + let bson = self + .get(key) + .map_err(|e| ValueAccessError { + key: key.to_string(), + kind: ValueAccessErrorKind::InvalidBson(e), + })? + .ok_or(ValueAccessError { + key: key.to_string(), + kind: ValueAccessErrorKind::NotPresent, + })?; + match f(bson) { + Some(t) => Ok(t), + None => Err(ValueAccessError { + key: key.to_string(), + kind: ValueAccessErrorKind::UnexpectedType { + expected: expected_type, + actual: bson.element_type(), + }, + }), + } } - /// Gets an iterator over the elements in the `RawDocument`, which yields `Result<&str, - /// Element<'_>>`. + /// Gets a reference to the BSON double value corresponding to a given key or returns an error + /// if the key corresponds to a value which isn't a double. /// /// ``` /// # use bson::raw::Error; - /// use bson::{doc, raw::RawDocument}; + /// use bson::raw::{ValueAccessErrorKind, RawDocumentBuf}; + /// use bson::doc; /// - /// let doc = RawDocument::from_document(&doc! { "ferris": true })?; + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "bool": true, + /// "f64": 2.5, + /// })?; + /// + /// assert_eq!(doc.get_f64("f64")?, 2.5); + /// assert!(matches!(doc.get_f64("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_f64("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_f64(&self, key: impl AsRef) -> ValueAccessResult { + self.get_with(key, ElementType::Double, RawBson::as_f64) + } + + /// Gets a reference to the string value corresponding to a given key or returns an error if the + /// key corresponds to a value which isn't a string. + /// + /// ``` + /// use bson::{doc, raw::{RawDocumentBuf, ValueAccessErrorKind}}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "string": "hello", + /// "bool": true, + /// })?; + /// + /// assert_eq!(doc.get_str("string")?, "hello"); + /// assert!(matches!(doc.get_str("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_str("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_str(&self, key: impl AsRef) -> ValueAccessResult<&'_ str> { + self.get_with(key, ElementType::String, RawBson::as_str) + } + + /// Gets a reference to the document value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a document. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::{ValueAccessErrorKind, RawDocumentBuf}}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "doc": { "key": "value"}, + /// "bool": true, + /// })?; + /// + /// assert_eq!(doc.get_document("doc")?.get_str("key")?, "value"); + /// assert!(matches!(doc.get_document("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_document("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_document(&self, key: impl AsRef) -> ValueAccessResult<&'_ RawDocument> { + self.get_with(key, ElementType::EmbeddedDocument, RawBson::as_document) + } + + /// Gets a reference to the array value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't an array. /// - /// for element in doc.iter() { - /// let (key, value) = element?; - /// assert_eq!(key, "ferris"); - /// assert_eq!(value.as_bool(), Some(true)); - /// } - /// # Ok::<(), Error>(()) /// ``` + /// use bson::{doc, raw::{RawDocumentBuf, ValueAccessErrorKind}}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "array": [true, 3], + /// "bool": true, + /// })?; /// - /// # Note: + /// let mut arr_iter = doc.get_array("array")?.into_iter(); + /// let _: bool = arr_iter.next().unwrap()?.as_bool().unwrap(); + /// let _: i32 = arr_iter.next().unwrap()?.as_i32().unwrap(); /// - /// There is no owning iterator for [`RawDocument`]. If you need ownership over - /// elements that might need to allocate, you must explicitly convert - /// them to owned types yourself. - pub fn iter(&self) -> Iter<'_> { - self.into_iter() + /// assert!(arr_iter.next().is_none()); + /// assert!(doc.get_array("bool").is_err()); + /// assert!(matches!(doc.get_array("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_array(&self, key: impl AsRef) -> ValueAccessResult<&'_ RawArray> { + self.get_with(key, ElementType::Array, RawBson::as_array) } - /// Return the contained data as a `Vec` + /// Gets a reference to the BSON binary value corresponding to a given key or returns an error + /// if the key corresponds to a value which isn't a binary value. + /// + /// ``` + /// use bson::{ + /// doc, + /// raw::{ValueAccessErrorKind, RawDocumentBuf, RawBinary}, + /// spec::BinarySubtype, + /// Binary, + /// }; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1, 2, 3] }, + /// "bool": true, + /// })?; + /// + /// assert_eq!(doc.get_binary("binary")?.as_bytes(), &[1, 2, 3][..]); + /// assert!(matches!(doc.get_binary("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_binary("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_binary(&self, key: impl AsRef) -> ValueAccessResult> { + self.get_with(key, ElementType::Binary, RawBson::as_binary) + } + + /// Gets a reference to the ObjectId value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't an ObjectId. /// /// ``` /// # use bson::raw::Error; - /// use bson::{doc, raw::RawDocument}; + /// use bson::{doc, oid::ObjectId, raw::{ValueAccessErrorKind, RawDocumentBuf}}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "_id": ObjectId::new(), + /// "bool": true, + /// })?; /// - /// let doc = RawDocument::from_document(&doc!{})?; - /// assert_eq!(doc.into_vec(), b"\x05\x00\x00\x00\x00".to_vec()); + /// let oid = doc.get_object_id("_id")?; + /// assert!(matches!(doc.get_object_id("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_object_id("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_object_id(&self, key: impl AsRef) -> ValueAccessResult { + self.get_with(key, ElementType::ObjectId, RawBson::as_object_id) + } + + /// Gets a reference to the boolean value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a boolean. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, oid::ObjectId, raw::{RawDocumentBuf, ValueAccessErrorKind}}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "_id": ObjectId::new(), + /// "bool": true, + /// })?; + /// + /// assert!(doc.get_bool("bool")?); + /// assert!(matches!(doc.get_bool("_id").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_bool("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_bool(&self, key: impl AsRef) -> ValueAccessResult { + self.get_with(key, ElementType::Boolean, RawBson::as_bool) + } + + /// Gets a reference to the BSON DateTime value corresponding to a given key or returns an + /// error if the key corresponds to a value which isn't a DateTime. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::{ValueAccessErrorKind, RawDocumentBuf}, DateTime}; + /// + /// let dt = DateTime::now(); + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "created_at": dt, + /// "bool": true, + /// })?; + /// + /// assert_eq!(doc.get_datetime("created_at")?, dt); + /// assert!(matches!(doc.get_datetime("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_datetime("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_datetime(&self, key: impl AsRef) -> ValueAccessResult { + self.get_with(key, ElementType::DateTime, RawBson::as_datetime) + } + + /// Gets a reference to the BSON regex value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a regex. + /// + /// ``` + /// use bson::{doc, Regex, raw::{RawDocumentBuf, ValueAccessErrorKind}}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "regex": Regex { + /// pattern: r"end\s*$".into(), + /// options: "i".into(), + /// }, + /// "bool": true, + /// })?; + /// + /// assert_eq!(doc.get_regex("regex")?.pattern(), r"end\s*$"); + /// assert_eq!(doc.get_regex("regex")?.options(), "i"); + /// assert!(matches!(doc.get_regex("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_regex("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_regex(&self, key: impl AsRef) -> ValueAccessResult> { + self.get_with(key, ElementType::RegularExpression, RawBson::as_regex) + } + + /// Gets a reference to the BSON timestamp value corresponding to a given key or returns an + /// error if the key corresponds to a value which isn't a timestamp. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, Timestamp, raw::{RawDocumentBuf, ValueAccessErrorKind}}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "bool": true, + /// "ts": Timestamp { time: 649876543, increment: 9 }, + /// })?; + /// + /// let timestamp = doc.get_timestamp("ts")?; + /// + /// assert_eq!(timestamp.time, 649876543); + /// assert_eq!(timestamp.increment, 9); + /// assert!(matches!(doc.get_timestamp("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_timestamp("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_timestamp(&self, key: impl AsRef) -> ValueAccessResult { + self.get_with(key, ElementType::Timestamp, RawBson::as_timestamp) + } + + /// Gets a reference to the BSON int32 value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a 32-bit integer. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::{RawDocumentBuf, ValueAccessErrorKind}}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "bool": true, + /// "i32": 1_000_000, + /// })?; + /// + /// assert_eq!(doc.get_i32("i32")?, 1_000_000); + /// assert!(matches!(doc.get_i32("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { ..})); + /// assert!(matches!(doc.get_i32("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_i32(&self, key: impl AsRef) -> ValueAccessResult { + self.get_with(key, ElementType::Int32, RawBson::as_i32) + } + + /// Gets a reference to the BSON int64 value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a 64-bit integer. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::{ValueAccessErrorKind, RawDocumentBuf}}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "bool": true, + /// "i64": 9223372036854775807_i64, + /// })?; + /// + /// assert_eq!(doc.get_i64("i64")?, 9223372036854775807); + /// assert!(matches!(doc.get_i64("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_i64("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_i64(&self, key: impl AsRef) -> ValueAccessResult { + self.get_with(key, ElementType::Int64, RawBson::as_i64) + } + + /// Return a reference to the contained data as a `&[u8]` + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::RawDocumentBuf}; + /// let docbuf = RawDocumentBuf::from_document(&doc!{})?; + /// assert_eq!(docbuf.as_bytes(), b"\x05\x00\x00\x00\x00"); /// # Ok::<(), Error>(()) /// ``` - pub fn into_vec(self) -> Vec { - self.data + pub fn as_bytes(&self) -> &[u8] { + &self.data } } @@ -147,23 +490,34 @@ impl std::fmt::Debug for RawDocument { } } -impl<'a> From for Cow<'a, RawDoc> { - fn from(rd: RawDocument) -> Self { - Cow::Owned(rd) +impl AsRef for RawDocument { + fn as_ref(&self) -> &RawDocument { + self + } +} + +impl ToOwned for RawDocument { + type Owned = RawDocumentBuf; + + fn to_owned(&self) -> Self::Owned { + self.to_raw_document_buf() } } -impl<'a> From<&'a RawDocument> for Cow<'a, RawDoc> { - fn from(rd: &'a RawDocument) -> Self { - Cow::Borrowed(rd.as_ref()) +impl<'a> From<&'a RawDocument> for Cow<'a, RawDocument> { + fn from(rdr: &'a RawDocument) -> Self { + Cow::Borrowed(rdr) } } -impl TryFrom for Document { +impl TryFrom<&RawDocument> for crate::Document { type Error = Error; - fn try_from(raw: RawDocument) -> Result { - Document::try_from(raw.as_ref()) + fn try_from(rawdoc: &RawDocument) -> Result { + rawdoc + .into_iter() + .map(|res| res.and_then(|(k, v)| Ok((k.to_owned(), v.try_into()?)))) + .collect() } } @@ -175,23 +529,3 @@ impl<'a> IntoIterator for &'a RawDocument { Iter::new(self) } } - -impl AsRef for RawDocument { - fn as_ref(&self) -> &RawDoc { - RawDoc::new_unchecked(&self.data) - } -} - -impl Deref for RawDocument { - type Target = RawDoc; - - fn deref(&self) -> &Self::Target { - RawDoc::new_unchecked(&self.data) - } -} - -impl Borrow for RawDocument { - fn borrow(&self) -> &RawDoc { - &*self - } -} diff --git a/src/raw/document_buf.rs b/src/raw/document_buf.rs new file mode 100644 index 00000000..019a6e25 --- /dev/null +++ b/src/raw/document_buf.rs @@ -0,0 +1,197 @@ +use std::{ + borrow::{Borrow, Cow}, + convert::TryFrom, + ops::Deref, +}; + +use crate::Document; + +use super::{Error, ErrorKind, Iter, RawBson, RawDocument, Result}; + +/// An owned BSON document (akin to [`std::path::PathBuf`]), backed by a buffer of raw BSON bytes. +/// This can be created from a `Vec` or a [`crate::Document`]. +/// +/// Accessing elements within a [`RawDocumentBuf`] is similar to element access in +/// [`crate::Document`], but because the contents are parsed during iteration instead of at creation +/// time, format errors can happen at any time during use. +/// +/// Iterating over a [`RawDocumentBuf`] yields either an error or a key-value pair that borrows from +/// the original document without making any additional allocations. +/// +/// ``` +/// # use bson::raw::Error; +/// use bson::raw::RawDocumentBuf; +/// +/// let doc = RawDocumentBuf::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +/// let mut iter = doc.iter(); +/// let (key, value) = iter.next().unwrap()?; +/// assert_eq!(key, "hi"); +/// assert_eq!(value.as_str(), Some("y'all")); +/// assert!(iter.next().is_none()); +/// # Ok::<(), Error>(()) +/// ``` +/// +/// This type implements `Deref` to [`RawDocument`], meaning that all methods on [`RawDocument`] are +/// available on [`RawDocumentBuf`] values as well. This includes [`RawDocument::get`] or any of the +/// type-specific getters, such as [`RawDocument::get_object_id`] or [`RawDocument::get_str`]. Note +/// that accessing elements is an O(N) operation, as it requires iterating through the document from +/// the beginning to find the requested key. +/// +/// ``` +/// use bson::raw::RawDocumentBuf; +/// +/// let doc = RawDocumentBuf::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +/// assert_eq!(doc.get_str("hi")?, "y'all"); +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, PartialEq)] +pub struct RawDocumentBuf { + data: Vec, +} + +impl RawDocumentBuf { + /// Constructs a new [`RawDocumentBuf`], validating _only_ the + /// following invariants: + /// * `data` is at least five bytes long (the minimum for a valid BSON document) + /// * the initial four bytes of `data` accurately represent the length of the bytes as + /// required by the BSON spec. + /// * the last byte of `data` is a 0 + /// + /// Note that the internal structure of the bytes representing the + /// BSON elements is _not_ validated at all by this method. If the + /// bytes do not conform to the BSON spec, then method calls on + /// the RawDocument will return Errors where appropriate. + /// + /// ``` + /// # use bson::raw::{RawDocumentBuf, Error}; + /// let doc = RawDocumentBuf::new(b"\x05\0\0\0\0".to_vec())?; + /// # Ok::<(), Error>(()) + /// ``` + pub fn new(data: Vec) -> Result { + let _ = RawDocument::new(data.as_slice())?; + Ok(Self { data }) + } + + /// Create a [`RawDocumentBuf`] from a [`Document`]. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, oid::ObjectId, raw::RawDocumentBuf}; + /// + /// let document = doc! { + /// "_id": ObjectId::new(), + /// "name": "Herman Melville", + /// "title": "Moby-Dick", + /// }; + /// let doc = RawDocumentBuf::from_document(&document)?; + /// # Ok::<(), Error>(()) + /// ``` + pub fn from_document(doc: &Document) -> Result { + let mut data = Vec::new(); + doc.to_writer(&mut data).map_err(|e| Error { + key: None, + kind: ErrorKind::MalformedValue { + message: e.to_string(), + }, + })?; + + Ok(Self { data }) + } + + /// Gets an iterator over the elements in the [`RawDocumentBuf`], which yields + /// `Result<(&str, RawBson<'_>)>`. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::RawDocumentBuf}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { "ferris": true })?; + /// + /// for element in doc.iter() { + /// let (key, value) = element?; + /// assert_eq!(key, "ferris"); + /// assert_eq!(value.as_bool(), Some(true)); + /// } + /// # Ok::<(), Error>(()) + /// ``` + /// + /// # Note: + /// + /// There is no owning iterator for [`RawDocumentBuf`]. If you need ownership over + /// elements that might need to allocate, you must explicitly convert + /// them to owned types yourself. + pub fn iter(&self) -> Iter<'_> { + self.into_iter() + } + + /// Return the contained data as a `Vec` + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::RawDocumentBuf}; + /// + /// let doc = RawDocumentBuf::from_document(&doc!{})?; + /// assert_eq!(doc.into_vec(), b"\x05\x00\x00\x00\x00".to_vec()); + /// # Ok::<(), Error>(()) + /// ``` + pub fn into_vec(self) -> Vec { + self.data + } +} + +impl std::fmt::Debug for RawDocumentBuf { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RawDocumentBuf") + .field("data", &hex::encode(&self.data)) + .finish() + } +} + +impl<'a> From for Cow<'a, RawDocument> { + fn from(rd: RawDocumentBuf) -> Self { + Cow::Owned(rd) + } +} + +impl<'a> From<&'a RawDocumentBuf> for Cow<'a, RawDocument> { + fn from(rd: &'a RawDocumentBuf) -> Self { + Cow::Borrowed(rd.as_ref()) + } +} + +impl TryFrom for Document { + type Error = Error; + + fn try_from(raw: RawDocumentBuf) -> Result { + Document::try_from(raw.as_ref()) + } +} + +impl<'a> IntoIterator for &'a RawDocumentBuf { + type IntoIter = Iter<'a>; + type Item = Result<(&'a str, RawBson<'a>)>; + + fn into_iter(self) -> Iter<'a> { + Iter::new(self) + } +} + +impl AsRef for RawDocumentBuf { + fn as_ref(&self) -> &RawDocument { + RawDocument::new_unchecked(&self.data) + } +} + +impl Deref for RawDocumentBuf { + type Target = RawDocument; + + fn deref(&self) -> &Self::Target { + RawDocument::new_unchecked(&self.data) + } +} + +impl Borrow for RawDocumentBuf { + fn borrow(&self) -> &RawDocument { + &*self + } +} diff --git a/src/raw/error.rs b/src/raw/error.rs index c6f3fefe..556b7fa0 100644 --- a/src/raw/error.rs +++ b/src/raw/error.rs @@ -79,7 +79,7 @@ pub(crate) fn try_with_key Result>(key: impl AsRef, f: pub type ValueAccessResult = std::result::Result; /// Error to indicate that either a value was empty or it contained an unexpected -/// type, for use with the direct getters (e.g. [`crate::raw::RawDoc::get_str`]). +/// type, for use with the direct getters (e.g. [`crate::RawDocument::get_str`]). #[derive(Debug, PartialEq, Clone)] #[non_exhaustive] pub struct ValueAccessError { @@ -97,7 +97,7 @@ impl ValueAccessError { } } -/// The type of error encountered when using a direct getter (e.g. [`crate::raw::RawDoc::get_str`]). +/// The type of error encountered when using a direct getter (e.g. [`crate::RawDocument::get_str`]). #[derive(Debug, PartialEq, Clone)] #[non_exhaustive] pub enum ValueAccessErrorKind { diff --git a/src/raw/iter.rs b/src/raw/iter.rs index 113857a5..2e46da52 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -19,17 +19,17 @@ use super::{ i64_from_slice, read_lenencoded, read_nullterminated, - RawArr, + RawArray, RawBinary, RawBson, - RawDoc, + RawDocument, RawJavaScriptCodeWithScope, RawRegex, }; /// An iterator over the document's entries. pub struct Iter<'a> { - doc: &'a RawDoc, + doc: &'a RawDocument, offset: usize, /// Whether the underlying doc is assumed to be valid or if an error has been encountered. @@ -38,7 +38,7 @@ pub struct Iter<'a> { } impl<'a> Iter<'a> { - pub(crate) fn new(doc: &'a RawDoc) -> Self { + pub(crate) fn new(doc: &'a RawDocument) -> Self { Self { doc, offset: 4, @@ -70,7 +70,7 @@ impl<'a> Iter<'a> { Ok(oid) } - fn next_document(&self, starting_at: usize) -> Result<&'a RawDoc> { + fn next_document(&self, starting_at: usize) -> Result<&'a RawDocument> { self.verify_enough_bytes(starting_at, MIN_BSON_DOCUMENT_SIZE as usize)?; let size = i32_from_slice(&self.doc.as_bytes()[starting_at..])? as usize; @@ -91,7 +91,7 @@ impl<'a> Iter<'a> { }, }); } - RawDoc::new(&self.doc.as_bytes()[starting_at..end]) + RawDocument::new(&self.doc.as_bytes()[starting_at..end]) } } @@ -167,7 +167,10 @@ impl<'a> Iterator for Iter<'a> { } ElementType::Array => { let doc = self.next_document(valueoffset)?; - (RawBson::Array(RawArr::from_doc(doc)), doc.as_bytes().len()) + ( + RawBson::Array(RawArray::from_doc(doc)), + doc.as_bytes().len(), + ) } ElementType::Binary => { let len = i32_from_slice(&self.doc.as_bytes()[valueoffset..])? as usize; @@ -260,7 +263,7 @@ impl<'a> Iterator for Iter<'a> { let slice = &&self.doc.as_bytes()[valueoffset..(valueoffset + length)]; let code = read_lenencoded(&slice[4..])?; let scope_start = 4 + 4 + code.len() + 1; - let scope = RawDoc::new(&slice[scope_start..])?; + let scope = RawDocument::new(&slice[scope_start..])?; ( RawBson::JavaScriptCodeWithScope(RawJavaScriptCodeWithScope { code, diff --git a/src/raw/mod.rs b/src/raw/mod.rs index b93c7d1d..59f36595 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -1,27 +1,29 @@ //! An API for interacting with raw BSON bytes. //! -//! This module provides two document types, [`RawDocument`] and [`RawDoc`] (akin to +//! This module provides two document types, [`RawDocumentBuf`] and [`RawDocument`] (akin to //! [`std::string::String`] and [`str`]), for working with raw BSON documents. These types differ //! from the regular [`crate::Document`] type in that their storage is BSON bytes rather than a //! hash-map like Rust type. In certain circumstances, these types can be leveraged for increased //! performance. //! //! This module also provides a [`RawBson`] type for modeling any borrowed BSON element and a -//! [`RawArr`] type for modeling a borrowed slice of a document containing a BSON array element. +//! [`RawArray`] type for modeling a borrowed slice of a document containing a BSON array element. //! -//! A [`RawDocument`] can be created from a `Vec` containing raw BSON data, and elements -//! accessed via methods similar to those available on the [`crate::Document`] type. Note that -//! [`RawDoc::get`] returns a [`Result>`], since the bytes contained in -//! the document are not fully validated until trying to access the contained data. +//! A [`RawDocumentBuf`] can be created from a `Vec` containing raw BSON data. A +//! [`RawDocument`] can be created from anything that can be borrowed as a `&[u8]`. Both types +//! can access elements via methods similar to those available on the [`crate::Document`] type. +//! Note that [`RawDocument::get`] (which [`RawDocument`] calls through to via its `Deref` +//! implementation) returns a `Result`, since the bytes contained in the document are not fully +//! validated until trying to access the contained data. //! //! ```rust //! use bson::raw::{ //! RawBson, -//! RawDocument, +//! RawDocumentBuf, //! }; //! //! // See http://bsonspec.org/spec.html for details on the binary encoding of BSON. -//! let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +//! let doc = RawDocumentBuf::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; //! let elem = doc.get("hi")?.unwrap(); //! //! assert_eq!( @@ -39,7 +41,7 @@ //! //! ```rust //! use bson::{ -//! raw::RawDocument, +//! raw::RawDocumentBuf, //! doc, //! }; //! @@ -49,7 +51,7 @@ //! } //! }; //! -//! let raw = RawDocument::from_document(&document)?; +//! let raw = RawDocumentBuf::from_document(&document)?; //! let value = raw //! .get_document("goodbye")? //! .get_str("cruel")?; @@ -61,33 +63,33 @@ //! # Ok::<(), Box>(()) //! ``` //! -//! ### Reference types ([`RawDoc`]) +//! ### Reference type ([`RawDocument`]) //! -//! A BSON document can also be accessed with the [`RawDoc`] type, which is an +//! A BSON document can also be accessed with the [`RawDocument`] type, which is an //! unsized type that represents the BSON payload as a `[u8]`. This allows accessing nested -//! documents without reallocation. [`RawDoc`] must always be accessed via a pointer type, -//! similarly to `[T]` and `str`. +//! documents without reallocation. [`RawDocument`] must always be accessed via a pointer type, +//! similar to `[T]` and `str`. //! //! The below example constructs a bson document in a stack-based array, -//! and extracts a &str from it, performing no heap allocation. +//! and extracts a `&str` from it, performing no heap allocation. //! ```rust -//! use bson::raw::RawDoc; +//! use bson::raw::RawDocument; //! //! let bytes = b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00"; -//! assert_eq!(RawDoc::new(bytes)?.get_str("hi")?, "y'all"); +//! assert_eq!(RawDocument::new(bytes)?.get_str("hi")?, "y'all"); //! # Ok::<(), Box>(()) //! ``` //! //! ### Iteration //! -//! [`RawDoc`] implements [`IntoIterator`](std::iter::IntoIterator), which can also be -//! accessed via [`RawDocument::iter`]. +//! [`RawDocument`] implements [`IntoIterator`](std::iter::IntoIterator), which can also be +//! accessed via [`RawDocumentBuf::iter`]. //! ```rust //! use bson::{ //! raw::{ //! RawBson, -//! RawDocument, +//! RawDocumentBuf, //! }, //! doc, //! }; @@ -97,7 +99,7 @@ //! "year": "2021", //! }; //! -//! let doc = RawDocument::from_document(&original_doc)?; +//! let doc = RawDocumentBuf::from_document(&original_doc)?; //! let mut doc_iter = doc.iter(); //! //! let (key, value): (&str, RawBson) = doc_iter.next().unwrap()?; @@ -112,8 +114,8 @@ mod array; mod bson; -mod doc; mod document; +mod document_buf; mod error; mod iter; #[cfg(test)] @@ -124,10 +126,10 @@ use std::convert::{TryFrom, TryInto}; use crate::de::MIN_BSON_STRING_SIZE; pub use self::{ - array::{RawArr, RawArrIter}, + array::{RawArray, RawArrayIter}, bson::{RawBinary, RawBson, RawDbPointer, RawJavaScriptCodeWithScope, RawRegex}, - doc::RawDoc, document::RawDocument, + document_buf::RawDocumentBuf, error::{Error, ErrorKind, Result, ValueAccessError, ValueAccessErrorKind, ValueAccessResult}, iter::Iter, }; diff --git a/src/raw/test/mod.rs b/src/raw/test/mod.rs index 8bbc97d6..234b326d 100644 --- a/src/raw/test/mod.rs +++ b/src/raw/test/mod.rs @@ -28,7 +28,7 @@ fn string_from_document() { "that": "second", "something": "else", }); - let rawdoc = RawDoc::new(&docbytes).unwrap(); + let rawdoc = RawDocument::new(&docbytes).unwrap(); assert_eq!( rawdoc.get("that").unwrap().unwrap().as_str().unwrap(), "second", @@ -43,7 +43,7 @@ fn nested_document() { "i64": 6_i64, }, }); - let rawdoc = RawDoc::new(&docbytes).unwrap(); + let rawdoc = RawDocument::new(&docbytes).unwrap(); let subdoc = rawdoc .get("outer") .expect("get doc result") @@ -78,7 +78,7 @@ fn iterate() { "peanut butter": "chocolate", "easy as": {"do": 1, "re": 2, "mi": 3}, }); - let rawdoc = RawDoc::new(&docbytes).expect("malformed bson document"); + let rawdoc = RawDocument::new(&docbytes).expect("malformed bson document"); let mut dociter = rawdoc.into_iter(); let next = dociter.next().expect("no result").expect("invalid bson"); assert_eq!(next.0, "apples"); @@ -115,7 +115,7 @@ fn rawdoc_to_doc() { "end": "END", }); - let rawdoc = RawDoc::new(&docbytes).expect("invalid document"); + let rawdoc = RawDocument::new(&docbytes).expect("invalid document"); let doc: crate::Document = rawdoc.try_into().expect("invalid bson"); let round_tripped_bytes = crate::to_vec(&doc).expect("serialize should work"); assert_eq!(round_tripped_bytes, docbytes); @@ -130,7 +130,7 @@ fn rawdoc_to_doc() { fn f64() { #![allow(clippy::float_cmp)] - let rawdoc = RawDocument::from_document(&doc! { "f64": 2.5 }).unwrap(); + let rawdoc = RawDocumentBuf::from_document(&doc! { "f64": 2.5 }).unwrap(); assert_eq!( rawdoc .get("f64") @@ -144,7 +144,7 @@ fn f64() { #[test] fn string() { - let rawdoc = RawDocument::from_document(&doc! {"string": "hello"}).unwrap(); + let rawdoc = RawDocumentBuf::from_document(&doc! {"string": "hello"}).unwrap(); assert_eq!( rawdoc @@ -159,7 +159,7 @@ fn string() { #[test] fn document() { - let rawdoc = RawDocument::from_document(&doc! {"document": {}}).unwrap(); + let rawdoc = RawDocumentBuf::from_document(&doc! {"document": {}}).unwrap(); let doc = rawdoc .get("document") @@ -172,7 +172,7 @@ fn document() { #[test] fn array() { - let rawdoc = RawDocument::from_document( + let rawdoc = RawDocumentBuf::from_document( &doc! { "array": ["binary", "serialized", "object", "notation"]}, ) .unwrap(); @@ -193,7 +193,7 @@ fn array() { #[test] fn binary() { - let rawdoc = RawDocument::from_document(&doc! { + let rawdoc = RawDocumentBuf::from_document(&doc! { "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] } }) .unwrap(); @@ -209,7 +209,7 @@ fn binary() { #[test] fn object_id() { - let rawdoc = RawDocument::from_document(&doc! { + let rawdoc = RawDocumentBuf::from_document(&doc! { "object_id": ObjectId::from_bytes([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), }) .unwrap(); @@ -224,7 +224,7 @@ fn object_id() { #[test] fn boolean() { - let rawdoc = RawDocument::from_document(&doc! { + let rawdoc = RawDocumentBuf::from_document(&doc! { "boolean": true, }) .unwrap(); @@ -241,7 +241,7 @@ fn boolean() { #[test] fn datetime() { - let rawdoc = RawDocument::from_document(&doc! { + let rawdoc = RawDocumentBuf::from_document(&doc! { "boolean": true, "datetime": DateTime::from_chrono(Utc.ymd(2000,10,31).and_hms(12, 30, 45)), }) @@ -257,7 +257,7 @@ fn datetime() { #[test] fn null() { - let rawdoc = RawDocument::from_document(&doc! { + let rawdoc = RawDocumentBuf::from_document(&doc! { "null": null, }) .unwrap(); @@ -271,7 +271,7 @@ fn null() { #[test] fn regex() { - let rawdoc = RawDocument::from_document(&doc! { + let rawdoc = RawDocumentBuf::from_document(&doc! { "regex": Bson::RegularExpression(Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}), }).unwrap(); let regex = rawdoc @@ -285,7 +285,7 @@ fn regex() { } #[test] fn javascript() { - let rawdoc = RawDocument::from_document(&doc! { + let rawdoc = RawDocumentBuf::from_document(&doc! { "javascript": Bson::JavaScriptCode(String::from("console.log(console);")), }) .unwrap(); @@ -300,7 +300,7 @@ fn javascript() { #[test] fn symbol() { - let rawdoc = RawDocument::from_document(&doc! { + let rawdoc = RawDocumentBuf::from_document(&doc! { "symbol": Bson::Symbol(String::from("artist-formerly-known-as")), }) .unwrap(); @@ -316,7 +316,7 @@ fn symbol() { #[test] fn javascript_with_scope() { - let rawdoc = RawDocument::from_document(&doc! { + let rawdoc = RawDocumentBuf::from_document(&doc! { "javascript_with_scope": Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope { code: String::from("console.log(msg);"), scope: doc! { "ok": true } @@ -343,7 +343,7 @@ fn javascript_with_scope() { #[test] fn int32() { - let rawdoc = RawDocument::from_document(&doc! { + let rawdoc = RawDocumentBuf::from_document(&doc! { "int32": 23i32, }) .unwrap(); @@ -358,7 +358,7 @@ fn int32() { #[test] fn timestamp() { - let rawdoc = RawDocument::from_document(&doc! { + let rawdoc = RawDocumentBuf::from_document(&doc! { "timestamp": Bson::Timestamp(Timestamp { time: 3542578, increment: 7 }), }) .unwrap(); @@ -375,7 +375,7 @@ fn timestamp() { #[test] fn int64() { - let rawdoc = RawDocument::from_document(&doc! { + let rawdoc = RawDocumentBuf::from_document(&doc! { "int64": 46i64, }) .unwrap(); @@ -408,7 +408,7 @@ fn document_iteration() { "int64": 46i64, "end": "END", }; - let rawdoc = RawDocument::from_document(&doc).unwrap(); + let rawdoc = RawDocumentBuf::from_document(&doc).unwrap(); let rawdocref = rawdoc.as_ref(); assert_eq!( @@ -439,7 +439,7 @@ fn into_bson_conversion() { "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] }, "boolean": false, }); - let rawbson = RawBson::Document(RawDoc::new(docbytes.as_slice()).unwrap()); + let rawbson = RawBson::Document(RawDocument::new(docbytes.as_slice()).unwrap()); let b: Bson = rawbson.try_into().expect("invalid bson"); let doc = b.as_document().expect("not a document"); assert_eq!(*doc.get("f64").expect("f64 not found"), Bson::Double(2.5)); @@ -486,14 +486,14 @@ use std::convert::TryInto; proptest! { #[test] fn no_crashes(s: Vec) { - let _ = RawDocument::new(s); + let _ = RawDocumentBuf::new(s); } #[test] fn roundtrip_bson(bson in arbitrary_bson()) { let doc = doc!{"bson": bson}; let raw = to_bytes(&doc); - let raw = RawDocument::new(raw); + let raw = RawDocumentBuf::new(raw); prop_assert!(raw.is_ok()); let raw = raw.unwrap(); let roundtrip: Result = raw.try_into(); diff --git a/src/tests/spec/corpus.rs b/src/tests/spec/corpus.rs index b2c45dac..9c6844dd 100644 --- a/src/tests/spec/corpus.rs +++ b/src/tests/spec/corpus.rs @@ -3,7 +3,7 @@ use std::{ str::FromStr, }; -use crate::{raw::RawDoc, tests::LOCK, Bson, Document}; +use crate::{raw::RawDocument, tests::LOCK, Bson, Document}; use pretty_assertions::assert_eq; use serde::Deserialize; @@ -79,7 +79,7 @@ fn run_test(test: TestFile) { let todocument_documentfromreader_cb: Document = crate::to_document(&documentfromreader_cb).expect(&description); - let document_from_raw_document: Document = RawDoc::new(canonical_bson.as_slice()) + let document_from_raw_document: Document = RawDocument::new(canonical_bson.as_slice()) .expect(&description) .try_into() .expect(&description); @@ -212,7 +212,7 @@ fn run_test(test: TestFile) { description, ); - let document_from_raw_document: Document = RawDoc::new(db.as_slice()) + let document_from_raw_document: Document = RawDocument::new(db.as_slice()) .expect(&description) .try_into() .expect(&description); @@ -402,7 +402,7 @@ fn run_test(test: TestFile) { ); let bson = hex::decode(&decode_error.bson).expect("should decode from hex"); - if let Ok(doc) = RawDoc::new(bson.as_slice()) { + if let Ok(doc) = RawDocument::new(bson.as_slice()) { Document::try_from(doc).expect_err(description.as_str()); }