From 7f78b62e2724a56d3dcac6f9e2c1a96da2901896 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Fri, 3 Mar 2023 09:43:11 -0500 Subject: [PATCH 1/8] SL-19314: WIP: First steps towards serializing to stream, not str. --- llsd/base.py | 22 +++++++++++++++++++--- llsd/serde_xml.py | 18 +++++++++--------- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/llsd/base.py b/llsd/base.py index 544f480..139ea1c 100644 --- a/llsd/base.py +++ b/llsd/base.py @@ -2,6 +2,7 @@ import base64 import binascii import datetime +import io import os import re import sys @@ -37,6 +38,8 @@ def __init__(self, thing=None): undef = _LLSD(None) +# 'binary' only exists so that a Python 2 caller can distinguish binary data +# from str data - since in Python 2, (bytes is str). if PY2: class binary(str): "Simple wrapper for llsd.binary data." @@ -187,18 +190,18 @@ def _str_to_bytes(s): return s -def _format_datestr(v): +def _write_datestr(stream, v): """ Formats a datetime or date object into the string format shared by xml and notation serializations. """ if not isinstance(v, datetime.date) and not isinstance(v, datetime.datetime): - raise LLSDParseError("invalid date string %s passed to date formatter" % v) + raise LLSDSerializationError("invalid date string %s passed to date formatter" % v) if not isinstance(v, datetime.datetime): v = datetime.datetime.combine(v, datetime.time(0)) - return _str_to_bytes(v.isoformat() + 'Z') + stream.write(_str_to_bytes(v.isoformat() + 'Z')) def _parse_datestr(datestr): @@ -366,6 +369,19 @@ def __init__(self): } + def format(self, something): + """ + Pure Python implementation of the formatter. + Format a python object according to the subclass's write() method. + + :param something: A python object (typically a dict) to be serialized. + :returns: A serialized bytes object. + """ + stream = io.BytesIO() + self.write(stream, something) + return stream.getvalue() + + _X_ORD = ord(b'x') _BACKSLASH_ORD = ord(b'\\') _DECODE_BUFF_ALLOC_SIZE = 1024 diff --git a/llsd/serde_xml.py b/llsd/serde_xml.py index c8404a5..65b5ab6 100644 --- a/llsd/serde_xml.py +++ b/llsd/serde_xml.py @@ -111,18 +111,18 @@ def _generate(self, something): raise LLSDSerializationError( "Cannot serialize unknown type: %s (%s)" % (t, something)) - def _format(self, something): - "Pure Python implementation of the formatter." - return b'' + self._elt(b"llsd", self._generate(something)) - - def format(self, something): + def write(stream, something): """ - Format a python object as application/llsd+xml + Serialize a python object to the passed binary 'stream' as + application/llsd+xml. + :param stream: A binary file-like object to which to serialize 'something'. :param something: A python object (typically a dict) to be serialized. - :returns: Returns an XML formatted string. """ - return self._format(something) + stream.write(b'') + # ... mumble self._elt(b"llsd", self._generate(something)) + raise NotImplementedError('Watch This Space') + class LLSDXMLPrettyFormatter(LLSDXMLFormatter): """ @@ -263,4 +263,4 @@ def format_xml(something): global _g_xml_formatter if _g_xml_formatter is None: _g_xml_formatter = LLSDXMLFormatter() - return _g_xml_formatter.format(something) \ No newline at end of file + return _g_xml_formatter.format(something) From 2dc9497a0b9ca050f4d5e10e3373044521a04806 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Mon, 6 Mar 2023 13:30:30 -0500 Subject: [PATCH 2/8] SL-19314: Revert _format_datestr() to return a value. It turns out that _format_datestr() is always called in a context where we do want a value, rather than directly writing to a stream. --- llsd/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llsd/base.py b/llsd/base.py index 139ea1c..8f9bf00 100644 --- a/llsd/base.py +++ b/llsd/base.py @@ -190,7 +190,7 @@ def _str_to_bytes(s): return s -def _write_datestr(stream, v): +def _format_datestr(v): """ Formats a datetime or date object into the string format shared by xml and notation serializations. @@ -201,7 +201,7 @@ def _write_datestr(stream, v): if not isinstance(v, datetime.datetime): v = datetime.datetime.combine(v, datetime.time(0)) - stream.write(_str_to_bytes(v.isoformat() + 'Z')) + return _str_to_bytes(v.isoformat() + 'Z') def _parse_datestr(datestr): From 99f19cb695221fab793fd19308ac9ac6e93030dd Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Mon, 6 Mar 2023 13:56:41 -0500 Subject: [PATCH 3/8] SL-19314: Recast LLSDXML[Pretty]Formatter to write to a stream. Replace format() method with a new write(stream, something) method that sets a new stream attribute on the formatter instance. Change (most) internal methods that used to return a bytes string to instead write to self.stream. This eliminates all serde_xml.py references to the B(...) hack. Introduce module-scope serde_xml.write_xml() and write_pretty_xml() functions to engage LLSDXMLFormatter.write() and LLSDXMLPrettyFormatter.write(), respectively. Since write_xml() and format_xml() both want to use module-scope _g_xml_formatter, extract its initialization to a new _get_xml_formatter() function. Enhance LLSDXMLFormatter._elt() method to accept a callable, and call it between the open and close element tags. This allows an easy migration path from _elt(b'name', self.method(something)) # method() returns a bytes string to _elt(b'name', lambda: self.method(something)) # method() writes to self.stream Removing LLSDXML[Pretty]Formatter.format() lets these classes inherit the new LLSDBaseFormatter.format() method, which passes an io.BytesIO instance to the subclass write() method and returns the contents of that BytesIO. Rename LLSDXMLPrettyFormatter.PRETTY_ARRAY and PRETTY_MAP to ARRAY and MAP, respectively, which lets LLSDBaseFormatter.__init__() find the right methods without having to patch self.type_map in the subclass constructor. --- llsd/serde_xml.py | 195 +++++++++++++++++++++++++++------------------- 1 file changed, 117 insertions(+), 78 deletions(-) diff --git a/llsd/serde_xml.py b/llsd/serde_xml.py index 65b5ab6..5294247 100644 --- a/llsd/serde_xml.py +++ b/llsd/serde_xml.py @@ -2,7 +2,8 @@ import re import types -from llsd.base import (_LLSD, ALL_CHARS, B, LLSDBaseFormatter, LLSDParseError, LLSDSerializationError, UnicodeType, +from llsd.base import (_LLSD, ALL_CHARS, LLSDBaseFormatter, LLSDParseError, + LLSDSerializationError, UnicodeType, _format_datestr, _str_to_bytes, _to_python, is_unicode) from llsd.fastest_elementtree import ElementTreeError, fromstring @@ -26,33 +27,44 @@ def remove_invalid_xml_bytes(b): class LLSDXMLFormatter(LLSDBaseFormatter): """ - Class which implements LLSD XML serialization.. + Class which implements LLSD XML serialization. http://wiki.secondlife.com/wiki/LLSD#XML_Serialization - This class wraps both a pure python and c-extension for formatting - a limited subset of python objects as application/llsd+xml. You do - not generally need to make an instance of this object since the - module level format_xml is the most convenient interface to this - functionality. + This class serializes a limited subset of python objects as + application/llsd+xml. You do not generally need to make an instance of + this class since the module level format_xml() is the most convenient + interface to this functionality. """ - __slots__ = [] + __slots__ = ['stream'] def _elt(self, name, contents=None): - "Serialize a single element." + """ + Serialize a single element. + + If 'contents' is omitted, write . + If 'contents' is bytes, write contents. + If 'contents' is str, write contents.encode('utf8'). + If 'contents' is callable, write , call contents(), write . + """ if not contents: - return B("<%s />") % (name,) + self.stream.writelines([b"<", name, b" />"]) else: - return B("<%s>%s") % (name, _str_to_bytes(contents), name) + self.stream.writelines([b"<", name, b">"]) + if callable(contents): + contents() + else: + self.stream.write(_str_to_bytes(contents)) + self.stream.writelines([b""]) def xml_esc(self, v): "Escape string or unicode object v for xml output" # Use is_unicode() instead of is_string() because in python 2, str is - # bytes, not unicode, and should not be "encode()"'d. attempts to + # bytes, not unicode, and should not be "encode()"d. Attempts to # encode("utf-8") a bytes type will result in an implicit # decode("ascii") that will throw a UnicodeDecodeError if the string - # contains non-ascii characters + # contains non-ascii characters. if is_unicode(v): # we need to drop these invalid characters because they # cannot be parsed (and encode() doesn't drop them for us) @@ -91,18 +103,17 @@ def DATE(self, v): def ARRAY(self, v): return self._elt( b'array', - b''.join([self._generate(item) for item in v])) + lambda: [self._generate(item) for item in v]) def MAP(self, v): return self._elt( b'map', - b''.join([B("%s%s") % (self._elt(b'key', self.xml_esc(UnicodeType(key))), - self._generate(value)) - for key, value in v.items()])) + lambda: [(self._elt(b'key', self.xml_esc(UnicodeType(key))), + self._generate(value)) + for key, value in v.items()]) - typeof = type def _generate(self, something): "Generate xml from a single python object." - t = self.typeof(something) + t = type(something) if t in self.type_map: return self.type_map[t](something) elif isinstance(something, _LLSD): @@ -111,7 +122,7 @@ def _generate(self, something): raise LLSDSerializationError( "Cannot serialize unknown type: %s (%s)" % (t, something)) - def write(stream, something): + def write(self, stream, something): """ Serialize a python object to the passed binary 'stream' as application/llsd+xml. @@ -119,9 +130,12 @@ def write(stream, something): :param stream: A binary file-like object to which to serialize 'something'. :param something: A python object (typically a dict) to be serialized. """ - stream.write(b'') - # ... mumble self._elt(b"llsd", self._generate(something)) - raise NotImplementedError('Watch This Space') + self.stream = stream + try: + stream.write(b'') + self._elt(b"llsd", lambda: self._generate(something)) + finally: + self.stream = None class LLSDXMLPrettyFormatter(LLSDXMLFormatter): @@ -142,13 +156,6 @@ def __init__(self, indent_atom = None): # Call the super class constructor so that we have the type map super(LLSDXMLPrettyFormatter, self).__init__() - # Override the type map to use our specialized formatters to - # emit the pretty output. - self.type_map[list] = self.PRETTY_ARRAY - self.type_map[tuple] = self.PRETTY_ARRAY - self.type_map[types.GeneratorType] = self.PRETTY_ARRAY, - self.type_map[dict] = self.PRETTY_MAP - # Private data used for indentation. self._indent_level = 1 if indent_atom is None: @@ -157,54 +164,52 @@ def __init__(self, indent_atom = None): self._indent_atom = indent_atom def _indent(self): - "Return an indentation based on the atom and indentation level." - return self._indent_atom * self._indent_level + "Write an indentation based on the atom and indentation level." + self.stream.writelines([self._indent_atom] * self._indent_level) - def PRETTY_ARRAY(self, v): + def ARRAY(self, v): "Recursively format an array with pretty turned on." - rv = [] - rv.append(b'\n') - self._indent_level = self._indent_level + 1 - rv.extend([B("%s%s\n") % - (self._indent(), - self._generate(item)) - for item in v]) - self._indent_level = self._indent_level - 1 - rv.append(self._indent()) - rv.append(b'') - return b''.join(rv) - - def PRETTY_MAP(self, v): + self.stream.write(b'\n') + self._indent_level += 1 + for item in v: + self._indent() + self._generate(item) + self.stream.write(b'\n') + self._indent_level -= 1 + self._indent() + self.stream.write(b'') + + def MAP(self, v): "Recursively format a map with pretty turned on." - rv = [] - rv.append(b'\n') - self._indent_level = self._indent_level + 1 - # list of keys - keys = list(v) - keys.sort() - rv.extend([B("%s%s\n%s%s\n") % - (self._indent(), - self._elt(b'key', UnicodeType(key)), - self._indent(), - self._generate(v[key])) - for key in keys]) - self._indent_level = self._indent_level - 1 - rv.append(self._indent()) - rv.append(b'') - return b''.join(rv) - - def format(self, something): + self.stream.write(b'\n') + self._indent_level += 1 + # sorted list of keys + for key in sorted(v): + self._indent() + self._elt(b'key', UnicodeType(key)) + self.stream.write(b'\n') + self._indent() + self._generate(v[key]) + self.stream.write(b'\n') + self._indent_level -= 1 + self._indent() + self.stream.write(b'') + + def write(self, stream, something): """ - Format a python object as application/llsd+xml + Serialize to passed 'stream' the python object 'something' as 'pretty' + application/llsd+xml. + :param stream: a binary stream open for writing. :param something: a python object (typically a dict) to be serialized. - :returns: Returns an XML formatted string. """ - data = [] - data.append(b'\n') - data.append(self._generate(something)) - data.append(b'\n') - return b'\n'.join(data) + self.stream = stream + try: + stream.write(b'\n') + self._generate(something) + stream.write(b'\n') + finally: + self.stream = None def format_pretty_xml(something): @@ -226,6 +231,26 @@ def format_pretty_xml(something): return LLSDXMLPrettyFormatter().format(something) +def write_pretty_xml(stream, something): + """ + Serialize to passed 'stream' the python object 'something' as 'pretty' + application/llsd+xml. + + :param stream: a binary stream open for writing. + :param something: a python object (typically a dict) to be serialized. + + See http://wiki.secondlife.com/wiki/LLSD#XML_Serialization + + The output conforms to the LLSD DTD, unlike the output from the + standard python xml.dom DOM::toprettyxml() method which does not + preserve significant whitespace. + This function is not necessarily suited for serializing very large + objects. It sorts on dict (llsd map) keys alphabetically to ease human + reading. + """ + return LLSDXMLPrettyFormatter().write(stream, something) + + declaration_regex = re.compile(br'^\s*(?:<\?[\x09\x0A\x0D\x20-\x7e]+\?>)|(?:)') def validate_xml_declaration(something): if not declaration_regex.match(something): @@ -248,6 +273,13 @@ def parse_xml(something): _g_xml_formatter = None +def _get_xml_formatter(): + global _g_xml_formatter + if _g_xml_formatter is None: + _g_xml_formatter = LLSDXMLFormatter() + return _g_xml_formatter + + def format_xml(something): """ Format a python object as application/llsd+xml @@ -255,12 +287,19 @@ def format_xml(something): :param something: a python object (typically a dict) to be serialized. :returns: Returns an XML formatted string. - Ssee http://wiki.secondlife.com/wiki/LLSD#XML_Serialization + See http://wiki.secondlife.com/wiki/LLSD#XML_Serialization + """ + return _get_xml_formatter().format(something) - This function wraps both a pure python and c-extension for formatting - a limited subset of python objects as application/llsd+xml. + +def write_xml(stream, something): """ - global _g_xml_formatter - if _g_xml_formatter is None: - _g_xml_formatter = LLSDXMLFormatter() - return _g_xml_formatter.format(something) + Serialize to passed 'stream' the python object 'something' as + application/llsd+xml. + + :param stream: a binary stream open for writing. + :param something: a python object (typically a dict) to be serialized. + + See http://wiki.secondlife.com/wiki/LLSD#XML_Serialization + """ + return _get_xml_formatter().write(stream, something) From dc753aff71fb003193f03b6a0b230ec2dcc28be3 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Mon, 6 Mar 2023 14:18:41 -0500 Subject: [PATCH 4/8] SL-19314: Move write() wrapper method to LLSDBaseFormatter. We want every subclass write() operation to set self.stream and clear it when done, so give that responsibility to the base-class write() method. write() calls subclass _write(), which then need only accept the 'something' to serialize. Update LLSDXMLFormatter and LLSDXMLPrettyFormatter accordingly. --- llsd/base.py | 19 ++++++++++++++++++- llsd/serde_xml.py | 32 +++++++++----------------------- 2 files changed, 27 insertions(+), 24 deletions(-) diff --git a/llsd/base.py b/llsd/base.py index 8f9bf00..4b38cf4 100644 --- a/llsd/base.py +++ b/llsd/base.py @@ -344,8 +344,11 @@ class LLSDBaseFormatter(object): role of this base class is to provide self.type_map based on the methods defined in its subclass. """ + __slots__ = ['stream', 'type_map'] + def __init__(self): "Construct a new formatter dispatch table." + self.stream = None self.type_map = { type(None): self.UNDEF, undef: self.UNDEF, @@ -372,7 +375,7 @@ def __init__(self): def format(self, something): """ Pure Python implementation of the formatter. - Format a python object according to the subclass's write() method. + Format a python object according to subclass formatting. :param something: A python object (typically a dict) to be serialized. :returns: A serialized bytes object. @@ -381,6 +384,20 @@ def format(self, something): self.write(stream, something) return stream.getvalue() + def write(self, stream, something): + """ + Serialize a python object to the passed binary 'stream' according to + subclass formatting. + + :param stream: A binary file-like object to which to serialize 'something'. + :param something: A python object (typically a dict) to be serialized. + """ + self.stream = stream + try: + return self._write(something) + finally: + self.stream = None + _X_ORD = ord(b'x') _BACKSLASH_ORD = ord(b'\\') diff --git a/llsd/serde_xml.py b/llsd/serde_xml.py index 5294247..ba8451f 100644 --- a/llsd/serde_xml.py +++ b/llsd/serde_xml.py @@ -36,8 +36,6 @@ class LLSDXMLFormatter(LLSDBaseFormatter): this class since the module level format_xml() is the most convenient interface to this functionality. """ - __slots__ = ['stream'] - def _elt(self, name, contents=None): """ Serialize a single element. @@ -122,20 +120,14 @@ def _generate(self, something): raise LLSDSerializationError( "Cannot serialize unknown type: %s (%s)" % (t, something)) - def write(self, stream, something): + def _write(self, something): """ - Serialize a python object to the passed binary 'stream' as - application/llsd+xml. + Serialize a python object to self.stream as application/llsd+xml. - :param stream: A binary file-like object to which to serialize 'something'. :param something: A python object (typically a dict) to be serialized. """ - self.stream = stream - try: - stream.write(b'') - self._elt(b"llsd", lambda: self._generate(something)) - finally: - self.stream = None + self.stream.write(b'') + self._elt(b"llsd", lambda: self._generate(something)) class LLSDXMLPrettyFormatter(LLSDXMLFormatter): @@ -195,21 +187,15 @@ def MAP(self, v): self._indent() self.stream.write(b'') - def write(self, stream, something): + def _write(self, something): """ - Serialize to passed 'stream' the python object 'something' as 'pretty' - application/llsd+xml. + Serialize a python object to self.stream as 'pretty' application/llsd+xml. - :param stream: a binary stream open for writing. :param something: a python object (typically a dict) to be serialized. """ - self.stream = stream - try: - stream.write(b'\n') - self._generate(something) - stream.write(b'\n') - finally: - self.stream = None + self.stream.write(b'\n') + self._generate(something) + self.stream.write(b'\n') def format_pretty_xml(something): From 2fce8eb04e7269a3d7da01b5458acf2cf04ac78c Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Mon, 6 Mar 2023 15:25:48 -0500 Subject: [PATCH 5/8] SL-19314: Recast LLSDNotationFormatter to write to passed stream. Add serde_notation.write_notation(stream, something) module-scope method to invoke LLSDNotationFormatter().write(stream, something). --- llsd/serde_notation.py | 80 +++++++++++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 29 deletions(-) diff --git a/llsd/serde_notation.py b/llsd/serde_notation.py index e2e9340..6abdaf7 100644 --- a/llsd/serde_notation.py +++ b/llsd/serde_notation.py @@ -3,8 +3,10 @@ import re import uuid -from llsd.base import (_LLSD, B, LLSDBaseFormatter, LLSDBaseParser, LLSDParseError, LLSDSerializationError, UnicodeType, - _format_datestr, _parse_datestr, _str_to_bytes, binary, uri, PY2, is_bytes, PY3SemanticBytes) +from llsd.base import (_LLSD, B, LLSDBaseFormatter, LLSDBaseParser, LLSDParseError, + LLSDSerializationError, UnicodeType, + _format_datestr, _parse_datestr, _str_to_bytes, binary, + uri, PY2, is_bytes, PY3SemanticBytes) _int_regex = re.compile(br"[-+]?\d+") _real_regex = re.compile(br"[-+]?(?:(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?)|[-+]?inf|[-+]?nan") @@ -333,21 +335,16 @@ class LLSDNotationFormatter(LLSDBaseFormatter): See http://wiki.secondlife.com/wiki/LLSD#Notation_Serialization """ - __slots__ = [] - def LLSD(self, v): return self._generate(v.thing) def UNDEF(self, v): - return b'!' + self.stream.write(b'!') def BOOLEAN(self, v): - if v: - return b'true' - else: - return b'false' + self.stream.write(b'true' if v else b'false') def INTEGER(self, v): - return B("i%d") % v + self.stream.write(B("i%d") % v) def REAL(self, v): - return B("r%r") % v + self.stream.write(B("r%r") % v) def UUID(self, v): # latin-1 is the byte-to-byte encoding, mapping \x00-\xFF -> # \u0000-\u00FF. It's also the fastest encoding, I believe, from @@ -357,24 +354,42 @@ def UUID(self, v): # error behavior in case someone passes an invalid hex string, with # things other than 0-9a-fA-F, so that they will fail in the UUID # decode, rather than with a UnicodeError. - return B("u%s") % str(v).encode('latin-1') + self.stream.writelines([b"u", str(v).encode('latin-1')]) def BINARY(self, v): - return b'b64"' + base64.b64encode(v).strip() + b'"' + self.stream.writelines([b'b64"', base64.b64encode(v).strip(), b'"']) def STRING(self, v): - return B("'%s'") % _str_to_bytes(v).replace(b"\\", b"\\\\").replace(b"'", b"\\'") + self.stream.writelines([b"'", self._esc(v), b"'"]) def URI(self, v): - return B('l"%s"') % _str_to_bytes(v).replace(b"\\", b"\\\\").replace(b'"', b'\\"') + self.stream.writelines([b'l"', self._esc(v, b'"'), b'"']) def DATE(self, v): - return B('d"%s"') % _format_datestr(v) + self.stream.writelines([b'd"', _format_datestr(v), b'"']) def ARRAY(self, v): - return B("[%s]") % b','.join([self._generate(item) for item in v]) + self.stream.write(b'[') + delim = b'' + for item in v: + self.stream.write(delim) + self._generate(item) + delim = b',' + self.stream.write(b']') def MAP(self, v): - return B("{%s}") % b','.join([B("'%s':%s") % (_str_to_bytes(UnicodeType(key)).replace(b"\\", b"\\\\").replace(b"'", b"\\'"), self._generate(value)) - for key, value in v.items()]) + self.stream.write(b'{') + delim = b'' + for key, value in v.items(): + self.stream.writelines([delim, b"'", self._esc(UnicodeType(key)), b"':"]) + self._generate(value) + delim = b',' + self.stream.write(b'}') + + def _esc(self, data, quote=b"'"): + return _str_to_bytes(data).replace(b"\\", b"\\\\").replace(quote, b'\\'+quote) def _generate(self, something): - "Generate notation from a single python object." + """ + Serialize a python object to self.stream as application/llsd+notation + + :param something: a python object (typically a dict) to be serialized. + """ t = type(something) handler = self.type_map.get(t) if handler: @@ -388,14 +403,8 @@ def _generate(self, something): raise LLSDSerializationError( "Cannot serialize unknown type: %s (%s)" % (t, something)) - def format(self, something): - """ - Format a python object as application/llsd+notation - - :param something: a python object (typically a dict) to be serialized. - :returns: Returns a LLSD notation formatted string. - """ - return self._generate(something) + # _write() method is an alias for _generate() + _write = _generate def format_notation(something): @@ -410,6 +419,19 @@ def format_notation(something): return LLSDNotationFormatter().format(something) +def write_notation(stream, something): + """ + Serialize to passed binary 'stream' a python object 'something' as + application/llsd+notation. + + :param stream: a binary stream open for writing. + :param something: a python object (typically a dict) to be serialized. + + See http://wiki.secondlife.com/wiki/LLSD#Notation_Serialization + """ + return LLSDNotationFormatter().write(stream, something) + + def parse_notation(something): """ This is the basic public interface for parsing llsd+notation. @@ -417,4 +439,4 @@ def parse_notation(something): :param something: The data to parse. :returns: Returns a python object. """ - return LLSDNotationParser().parse(something) \ No newline at end of file + return LLSDNotationParser().parse(something) From 65c047b430fc9ef8397a191477f29bf6327f6a58 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Mon, 6 Mar 2023 15:56:04 -0500 Subject: [PATCH 6/8] SL-19314: Recast serde_binary.format_binary() to write to stream. That is, add serde_binary.write_binary(stream, something) and make format_binary() call it with an internal io.BytesIO instance. _format_binary_recurse(something) => _write_binary_recurse(stream, something) that writes serialized 'something' to the passed stream. --- llsd/serde_binary.py | 60 +++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/llsd/serde_binary.py b/llsd/serde_binary.py index cbf65e4..931a112 100644 --- a/llsd/serde_binary.py +++ b/llsd/serde_binary.py @@ -1,9 +1,11 @@ import calendar import datetime +import io import struct import uuid -from llsd.base import (_LLSD, LLSDBaseParser, LLSDSerializationError, _str_to_bytes, binary, is_integer, is_string, +from llsd.base import (_LLSD, LLSDBaseParser, LLSDSerializationError, + _str_to_bytes, binary, is_integer, is_string, starts_with, uri, PY2, is_bytes, PY3SemanticBytes) @@ -157,65 +159,65 @@ def format_binary(something): :param something: a python object (typically a dict) to be serialized. :returns: Returns a LLSD binary formatted string. """ - return b'\n' + _format_binary_recurse(something) + stream = io.BytesIO() + write_binary(stream, something) + return stream.getvalue() -def _format_binary_recurse(something): +def write_binary(stream, something): + stream.write(b'\n') + _write_binary_recurse(stream, something) + + +def _write_binary_recurse(stream, something): "Binary formatter workhorse." def _format_list(something): - array_builder = [] - array_builder.append(b'[' + struct.pack('!i', len(something))) + stream.writelines([b'[', struct.pack('!i', len(something))]) for item in something: - array_builder.append(_format_binary_recurse(item)) - array_builder.append(b']') - return b''.join(array_builder) + _write_binary_recurse(stream, item) + stream.write(b']') if something is None: - return b'!' + stream.write(b'!') elif isinstance(something, _LLSD): - return _format_binary_recurse(something.thing) + _write_binary_recurse(stream, something.thing) elif isinstance(something, bool): - if something: - return b'1' - else: - return b'0' + stream.write(b'1' if something else b'0') elif is_integer(something): try: - return b'i' + struct.pack('!i', something) + stream.writelines([b'i', struct.pack('!i', something)]) except (OverflowError, struct.error) as exc: raise LLSDSerializationError(str(exc), something) elif isinstance(something, float): try: - return b'r' + struct.pack('!d', something) + stream.writelines([b'r', struct.pack('!d', something)]) except SystemError as exc: raise LLSDSerializationError(str(exc), something) elif isinstance(something, uuid.UUID): - return b'u' + something.bytes + stream.writelines([b'u', something.bytes]) elif isinstance(something, binary): - return b'b' + struct.pack('!i', len(something)) + something + stream.writelines([b'b', struct.pack('!i', len(something)), something]) elif is_string(something): something = _str_to_bytes(something) - return b's' + struct.pack('!i', len(something)) + something + stream.writelines([b's', struct.pack('!i', len(something)), something]) elif isinstance(something, uri): - return b'l' + struct.pack('!i', len(something)) + something + stream.writelines([b'l', struct.pack('!i', len(something)), something]) elif isinstance(something, datetime.datetime): seconds_since_epoch = calendar.timegm(something.utctimetuple()) \ + something.microsecond // 1e6 - return b'd' + struct.pack(' Date: Tue, 7 Mar 2023 14:47:04 -0500 Subject: [PATCH 7/8] SL-19314: Hoist write_{binary,notation,xml,pretty_xml} into llsd. That is, publish the new write_binary(), write_notation(), write_xml() and write_pretty_xml() functions to package scope. --- llsd/__init__.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/llsd/__init__.py b/llsd/__init__.py index 85c3471..3e09a60 100644 --- a/llsd/__init__.py +++ b/llsd/__init__.py @@ -7,11 +7,14 @@ http://wiki.secondlife.com/wiki/LLSD """ -from llsd.base import (_LLSD, BINARY_MIME_TYPE, NOTATION_MIME_TYPE, XML_MIME_TYPE, LLSDParseError, - LLSDSerializationError, LongType, UnicodeType, binary, starts_with, undef, uri) -from llsd.serde_binary import LLSDBinaryParser, format_binary, parse_binary -from llsd.serde_notation import LLSDNotationFormatter, LLSDNotationParser, format_notation, parse_notation -from llsd.serde_xml import LLSDXMLFormatter, LLSDXMLPrettyFormatter, format_pretty_xml, format_xml, parse_xml +from llsd.base import (_LLSD, BINARY_MIME_TYPE, NOTATION_MIME_TYPE, XML_MIME_TYPE, + LLSDParseError, LLSDSerializationError, LongType, UnicodeType, + binary, starts_with, undef, uri) +from llsd.serde_binary import LLSDBinaryParser, format_binary, write_binary, parse_binary +from llsd.serde_notation import (LLSDNotationFormatter, LLSDNotationParser, + format_notation, write_notation, parse_notation) +from llsd.serde_xml import (LLSDXMLFormatter, LLSDXMLPrettyFormatter, parse_xml, + format_pretty_xml, write_pretty_xml, format_xml, write_xml) def parse(something, mime_type = None): From ca0650a2bbdc3957fe9c2a4c40876de588666116 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Thu, 16 Mar 2023 10:50:07 -0400 Subject: [PATCH 8/8] SL-19314: Address code review comments. Remove serde_xml._g_xml_formatter and its getter function: now that LLSDBaseFormatter stores a stream, sharing a global LLSDXMLFormatter instance is Wrong. Every call to format_xml() and write_xml() now gets a distinct instance. Rename LLSDBaseFormatter type-specific methods with leading underscore: they were never intended for public consumption. Moreover, since they all now return None, any hypothetical external caller will break. Better to break with AttributeError than being silently, disastrously wrong. Extract nested _format_list() function from serde_binary._write_binary_recurse() to module-scope _write_list(), explicitly accepting 'stream' parameter instead of binding it from enclosing scope. Eliminate duplicate serde_notation.write_notation(), evidently a git merge glitch. --- llsd/base.py | 38 +++++++++++++++++++------------------- llsd/serde_binary.py | 17 +++++++++-------- llsd/serde_notation.py | 41 ++++++++++++++--------------------------- llsd/serde_xml.py | 40 ++++++++++++++++------------------------ 4 files changed, 58 insertions(+), 78 deletions(-) diff --git a/llsd/base.py b/llsd/base.py index 50056a6..6094e90 100644 --- a/llsd/base.py +++ b/llsd/base.py @@ -330,25 +330,25 @@ def __init__(self): "Construct a new formatter dispatch table." self.stream = None self.type_map = { - type(None): self.UNDEF, - undef: self.UNDEF, - bool: self.BOOLEAN, - int: self.INTEGER, - LongType: self.INTEGER, - float: self.REAL, - uuid.UUID: self.UUID, - binary: self.BINARY, - str: self.STRING, - UnicodeType: self.STRING, - newstr: self.STRING, - uri: self.URI, - datetime.datetime: self.DATE, - datetime.date: self.DATE, - list: self.ARRAY, - tuple: self.ARRAY, - types.GeneratorType: self.ARRAY, - dict: self.MAP, - _LLSD: self.LLSD, + type(None): self._UNDEF, + undef: self._UNDEF, + bool: self._BOOLEAN, + int: self._INTEGER, + LongType: self._INTEGER, + float: self._REAL, + uuid.UUID: self._UUID, + binary: self._BINARY, + str: self._STRING, + UnicodeType: self._STRING, + newstr: self._STRING, + uri: self._URI, + datetime.datetime: self._DATE, + datetime.date: self._DATE, + list: self._ARRAY, + tuple: self._ARRAY, + types.GeneratorType: self._ARRAY, + dict: self._MAP, + _LLSD: self._LLSD, } diff --git a/llsd/serde_binary.py b/llsd/serde_binary.py index a73e5d3..673a246 100644 --- a/llsd/serde_binary.py +++ b/llsd/serde_binary.py @@ -165,12 +165,6 @@ def write_binary(stream, something): def _write_binary_recurse(stream, something): "Binary formatter workhorse." - def _format_list(something): - stream.writelines([b'[', struct.pack('!i', len(something))]) - for item in something: - _write_binary_recurse(stream, item) - stream.write(b']') - if something is None: stream.write(b'!') elif isinstance(something, _LLSD): @@ -204,7 +198,7 @@ def _format_list(something): seconds_since_epoch = calendar.timegm(something.timetuple()) stream.writelines([b'd', struct.pack(' # \u0000-\u00FF. It's also the fastest encoding, I believe, from # https://docs.python.org/3/library/codecs.html#encodings-and-unicode @@ -366,16 +366,16 @@ def UUID(self, v): # things other than 0-9a-fA-F, so that they will fail in the UUID # decode, rather than with a UnicodeError. self.stream.writelines([b"u", str(v).encode('latin-1')]) - def BINARY(self, v): + def _BINARY(self, v): self.stream.writelines([b'b64"', base64.b64encode(v).strip(), b'"']) - def STRING(self, v): + def _STRING(self, v): self.stream.writelines([b"'", self._esc(v), b"'"]) - def URI(self, v): + def _URI(self, v): self.stream.writelines([b'l"', self._esc(v, b'"'), b'"']) - def DATE(self, v): + def _DATE(self, v): self.stream.writelines([b'd"', _format_datestr(v), b'"']) - def ARRAY(self, v): + def _ARRAY(self, v): self.stream.write(b'[') delim = b'' for item in v: @@ -383,7 +383,7 @@ def ARRAY(self, v): self._generate(item) delim = b',' self.stream.write(b']') - def MAP(self, v): + def _MAP(self, v): self.stream.write(b'{') delim = b'' for key, value in v.items(): @@ -409,7 +409,7 @@ def _generate(self, something): return self.type_map[_LLSD](something) else: try: - return self.ARRAY(iter(something)) + return self._ARRAY(iter(something)) except TypeError: raise LLSDSerializationError( "Cannot serialize unknown type: %s (%s)" % (t, something)) @@ -443,19 +443,6 @@ def write_notation(stream, something): return LLSDNotationFormatter().write(stream, something) -def write_notation(stream, something): - """ - Serialize to passed binary 'stream' a python object 'something' as - application/llsd+notation. - - :param stream: a binary stream open for writing. - :param something: a python object (typically a dict) to be serialized. - - See http://wiki.secondlife.com/wiki/LLSD#Notation_Serialization - """ - return LLSDNotationFormatter().write(stream, something) - - def parse_notation(something): """ This is the basic public interface for parsing llsd+notation. diff --git a/llsd/serde_xml.py b/llsd/serde_xml.py index 2cd8f11..2f048a0 100644 --- a/llsd/serde_xml.py +++ b/llsd/serde_xml.py @@ -72,37 +72,37 @@ def xml_esc(self, v): v = remove_invalid_xml_bytes(v) return v.replace(b'&',b'&').replace(b'<',b'<').replace(b'>',b'>') - def LLSD(self, v): + def _LLSD(self, v): return self._generate(v.thing) - def UNDEF(self, _v): + def _UNDEF(self, _v): return self._elt(b'undef') - def BOOLEAN(self, v): + def _BOOLEAN(self, v): if v: return self._elt(b'boolean', b'true') else: return self._elt(b'boolean', b'false') - def INTEGER(self, v): + def _INTEGER(self, v): return self._elt(b'integer', str(v)) - def REAL(self, v): + def _REAL(self, v): return self._elt(b'real', repr(v)) - def UUID(self, v): + def _UUID(self, v): if v.int == 0: return self._elt(b'uuid') else: return self._elt(b'uuid', str(v)) - def BINARY(self, v): + def _BINARY(self, v): return self._elt(b'binary', base64.b64encode(v).strip()) - def STRING(self, v): + def _STRING(self, v): return self._elt(b'string', self.xml_esc(v)) - def URI(self, v): + def _URI(self, v): return self._elt(b'uri', self.xml_esc(str(v))) - def DATE(self, v): + def _DATE(self, v): return self._elt(b'date', _format_datestr(v)) - def ARRAY(self, v): + def _ARRAY(self, v): return self._elt( b'array', lambda: [self._generate(item) for item in v]) - def MAP(self, v): + def _MAP(self, v): return self._elt( b'map', lambda: [(self._elt(b'key', self.xml_esc(UnicodeType(key))), @@ -159,7 +159,7 @@ def _indent(self): "Write an indentation based on the atom and indentation level." self.stream.writelines([self._indent_atom] * self._indent_level) - def ARRAY(self, v): + def _ARRAY(self, v): "Recursively format an array with pretty turned on." self.stream.write(b'\n') self._indent_level += 1 @@ -171,7 +171,7 @@ def ARRAY(self, v): self._indent() self.stream.write(b'') - def MAP(self, v): + def _MAP(self, v): "Recursively format a map with pretty turned on." self.stream.write(b'\n') self._indent_level += 1 @@ -276,14 +276,6 @@ def parse_xml_nohdr(baseparser): return _to_python(element[0]) -_g_xml_formatter = None -def _get_xml_formatter(): - global _g_xml_formatter - if _g_xml_formatter is None: - _g_xml_formatter = LLSDXMLFormatter() - return _g_xml_formatter - - def format_xml(something): """ Format a python object as application/llsd+xml @@ -293,7 +285,7 @@ def format_xml(something): See http://wiki.secondlife.com/wiki/LLSD#XML_Serialization """ - return _get_xml_formatter().format(something) + return LLSDXMLFormatter().format(something) def write_xml(stream, something): @@ -306,4 +298,4 @@ def write_xml(stream, something): See http://wiki.secondlife.com/wiki/LLSD#XML_Serialization """ - return _get_xml_formatter().write(stream, something) + return LLSDXMLFormatter().write(stream, something)