-
Notifications
You must be signed in to change notification settings - Fork 151
RUST-1992 Introduce the &CStr
and CString
types for keys and regular expressions
#563
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
718073e
d32ccc1
1c33eb5
1aa5d08
33b69c5
c7564b9
7944192
a6e60e5
b63cd15
fe810e9
2ac2cb7
6eeaec4
a4eb15c
df0729a
b5882bf
1d8f3ad
14e97c7
a4b4d3a
e6e57ef
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ use std::{ | |
}; | ||
|
||
use bson::{ | ||
cstr, | ||
doc, | ||
oid::ObjectId, | ||
spec::BinarySubtype, | ||
|
@@ -835,8 +836,8 @@ fn raw_regex() { | |
|
||
let bytes = bson::serialize_to_vec(&doc! { | ||
"r": Regex { | ||
pattern: "a[b-c]d".to_string(), | ||
options: "ab".to_string(), | ||
pattern: cstr!("a[b-c]d").into(), | ||
options: cstr!("ab").into(), | ||
}, | ||
}) | ||
.expect("raw_regex"); | ||
|
@@ -927,8 +928,8 @@ impl AllTypes { | |
}; | ||
let date = DateTime::now(); | ||
let regex = Regex { | ||
pattern: "hello".to_string(), | ||
options: "x".to_string(), | ||
pattern: cstr!("hello").into(), | ||
options: cstr!("x").into(), | ||
}; | ||
let timestamp = Timestamp { | ||
time: 123, | ||
|
@@ -1058,8 +1059,8 @@ fn all_raw_types_rmp() { | |
scope: doc! { "x": 1 }, | ||
}, | ||
"regex": Regex { | ||
pattern: "pattern".to_string(), | ||
options: "opt".to_string() | ||
pattern: cstr!("pattern").into(), | ||
options: cstr!("opt").into() | ||
} | ||
}) | ||
.unwrap(); | ||
|
@@ -1254,24 +1255,22 @@ fn owned_raw_types() { | |
|
||
let f = Foo { | ||
subdoc: RawDocumentBuf::from_iter([ | ||
("a key", RawBson::String("a value".to_string())), | ||
("an objectid", RawBson::ObjectId(oid)), | ||
("a date", RawBson::DateTime(dt)), | ||
(cstr!("a key"), RawBson::String("a value".to_string())), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Building from a key-value list is one place where the repeated There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. makes sense, seems like something we can add if there's user demand for it |
||
(cstr!("an objectid"), RawBson::ObjectId(oid)), | ||
(cstr!("a date"), RawBson::DateTime(dt)), | ||
( | ||
"code_w_scope", | ||
cstr!("code_w_scope"), | ||
RawBson::JavaScriptCodeWithScope(raw_code_w_scope.clone()), | ||
), | ||
("decimal128", RawBson::Decimal128(d128)), | ||
]) | ||
.unwrap(), | ||
(cstr!("decimal128"), RawBson::Decimal128(d128)), | ||
]), | ||
array: RawArrayBuf::from_iter([ | ||
RawBson::String("a string".to_string()), | ||
RawBson::ObjectId(oid), | ||
RawBson::DateTime(dt), | ||
RawBson::JavaScriptCodeWithScope(raw_code_w_scope), | ||
RawBson::Decimal128(d128), | ||
]) | ||
.unwrap(), | ||
]), | ||
}; | ||
|
||
let expected = doc! { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -240,12 +240,12 @@ macro_rules! rawbson { | |
|
||
// Finished with trailing comma. | ||
(@array [$($elems:expr,)*]) => { | ||
$crate::RawArrayBuf::from_iter(vec![$($elems,)*]).expect("invalid bson value") | ||
$crate::RawArrayBuf::from_iter(vec![$($elems,)*]) | ||
}; | ||
|
||
// Finished without trailing comma. | ||
(@array [$($elems:expr),*]) => { | ||
$crate::RawArrayBuf::from_iter(vec![$($elems),*]).expect("invalid bson value") | ||
$crate::RawArrayBuf::from_iter(vec![$($elems),*]) | ||
}; | ||
|
||
// Next element is `null`. | ||
|
@@ -291,15 +291,26 @@ macro_rules! rawbson { | |
// Finished. | ||
(@object $object:ident () () ()) => {}; | ||
|
||
// Insert the current entry followed by trailing comma. | ||
(@object $object:ident [$($key:tt)+] ($value:expr) , $($rest:tt)*) => { | ||
$object.append(($($key)+), $value).expect("invalid bson value"); | ||
// Insert the current entry with followed by trailing comma, with a key literal. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tweaked the behavior of |
||
(@object $object:ident [$key:literal] ($value:expr) , $($rest:tt)*) => {{ | ||
$object.append($crate::raw::cstr!($key), $value); | ||
$crate::rawbson!(@object $object () ($($rest)*) ($($rest)*)); | ||
}}; | ||
|
||
// Insert the current entry with followed by trailing comma, with a key expression. | ||
(@object $object:ident [$($key:tt)+] ($value:expr) , $($rest:tt)*) => {{ | ||
$object.append($($key)+, $value); | ||
$crate::rawbson!(@object $object () ($($rest)*) ($($rest)*)); | ||
}}; | ||
|
||
// Insert the last entry without trailing comma, with a key literal. | ||
(@object $object:ident [$key:literal] ($value:expr)) => { | ||
$object.append($crate::raw::cstr!($key), $value); | ||
}; | ||
|
||
// Insert the last entry without trailing comma. | ||
// Insert the last entry without trailing comma, with a key expression. | ||
(@object $object:ident [$($key:tt)+] ($value:expr)) => { | ||
$object.append(($($key)+), $value).expect("invalid bson value"); | ||
$object.append($($key)+, $value); | ||
}; | ||
|
||
// Next value is `null`. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This really is the whole story in this one line - the failure point has been shifted from writing bytes to the document buffer (
append
and everything that used it) to constructing the string, and that in turn can now be done either at run-time or at compile-time if it's just a literal.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be possible to provide support for the
c"string"
syntax here, and more generally interop with the equivalentstd::ffi
types? I think that would be slightly more ergonomic for string literals since it wouldn't require a macro import.I'm also wondering how users can construct strings with interior null bytes - is that ever doable with a
&static str
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfortunately
std::ffi::CStr
and thec"string"
syntax don't don't require that the text be valid UTF8, so there'd still have to be a validation step at encode time if we used those :/. I added documentation to the bsonCStr
types to call out the differences (and make usage more clear).Zero bytes are actually completely valid UTF8 under normal circumstances (although why they'd be in there is anyone's guess), and can be constructed in string literals via
\0
:prints
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For my understanding because my knowledge of these string details is pretty fuzzy - is there somewhere we're enforcing valid UTF-8 for
CStr
/CString
upon construction? I see that we're checking for null bytes invalidate_cstr
, but don't understand how that's different from the constraints of astd::ffi::CStr
. I do get a compiler error for something like:So I'm wondering what would be valid input for
c"..."
but notcstr!("...")
.(I also missed the difference in inclusion/exclusion of the terminating null byte, thanks for adding that to the docs!)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't need to explicitly validate UTF-8 for these types because they only accept
&str
/String
, which are already required to be UTF-8 :). In the code where we're parsing things from a bytestream we've already got the validation code, and likewise any user constructing keys or regexes from strings will already have had to validate the data.I added some examples to the rustdoc to make it more clear where the validation for
bson::raw::CStr
is more strict than eitherstr
orstd::ffi::CStr
- for the latter tl;dr is that out of range byte sequences likec"\xc3\x28"
are perfectly valid butcstr!("\xc3\x28")
won't compile because the string literal"\xc3\x28"
itself is invalid.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
excellent, that all makes sense. thank you for the explanations and docs!