diff --git a/llsd/__init__.py b/llsd/__init__.py index e247e62..f746ca2 100644 --- a/llsd/__init__.py +++ b/llsd/__init__.py @@ -11,9 +11,13 @@ BINARY_HEADER, NOTATION_HEADER, XML_HEADER, LLSDBaseParser, LLSDParseError, LLSDSerializationError, LongType, UnicodeType, binary, undef, uri) -from llsd.serde_binary import LLSDBinaryParser, format_binary, parse_binary, parse_binary_nohdr -from llsd.serde_notation import LLSDNotationFormatter, LLSDNotationParser, format_notation, parse_notation, parse_notation_nohdr -from llsd.serde_xml import LLSDXMLFormatter, LLSDXMLPrettyFormatter, format_pretty_xml, format_xml, parse_xml, parse_xml_nohdr +from llsd.serde_binary import (LLSDBinaryParser, format_binary, parse_binary, parse_binary_nohdr, + write_binary) +from llsd.serde_notation import (LLSDNotationFormatter, write_notation, format_notation, + LLSDNotationParser, parse_notation, parse_notation_nohdr) +from llsd.serde_xml import (LLSDXMLFormatter, LLSDXMLPrettyFormatter, + write_pretty_xml, write_xml, format_pretty_xml, format_xml, + parse_xml, parse_xml_nohdr) def parse(something, mime_type = None): diff --git a/llsd/base.py b/llsd/base.py index aaabb54..5cc5e7f 100644 --- a/llsd/base.py +++ b/llsd/base.py @@ -42,6 +42,8 @@ def __init__(self, thing=None): undef = _LLSD(None) +# 'binary' only exists so that a Python 2 caller can distinguish binary data +# from str data - since in Python 2, (bytes is str). if PY2: class binary(str): "Simple wrapper for llsd.binary data." @@ -174,7 +176,7 @@ def _format_datestr(v): xml and notation serializations. """ if not isinstance(v, datetime.date) and not isinstance(v, datetime.datetime): - raise LLSDParseError("invalid date string %s passed to date formatter" % v) + raise LLSDSerializationError("invalid date string %s passed to date formatter" % v) if not isinstance(v, datetime.datetime): v = datetime.datetime.combine(v, datetime.time(0)) @@ -322,31 +324,61 @@ class LLSDBaseFormatter(object): role of this base class is to provide self.type_map based on the methods defined in its subclass. """ + __slots__ = ['stream', 'type_map'] + def __init__(self): "Construct a new formatter dispatch table." + self.stream = None self.type_map = { - type(None): self.UNDEF, - undef: self.UNDEF, - bool: self.BOOLEAN, - int: self.INTEGER, - LongType: self.INTEGER, - float: self.REAL, - uuid.UUID: self.UUID, - binary: self.BINARY, - str: self.STRING, - UnicodeType: self.STRING, - newstr: self.STRING, - uri: self.URI, - datetime.datetime: self.DATE, - datetime.date: self.DATE, - list: self.ARRAY, - tuple: self.ARRAY, - types.GeneratorType: self.ARRAY, - dict: self.MAP, - _LLSD: self.LLSD, + type(None): self._UNDEF, + undef: self._UNDEF, + bool: self._BOOLEAN, + int: self._INTEGER, + LongType: self._INTEGER, + float: self._REAL, + uuid.UUID: self._UUID, + binary: self._BINARY, + str: self._STRING, + UnicodeType: self._STRING, + newstr: self._STRING, + uri: self._URI, + datetime.datetime: self._DATE, + datetime.date: self._DATE, + list: self._ARRAY, + tuple: self._ARRAY, + types.GeneratorType: self._ARRAY, + dict: self._MAP, + _LLSD: self._LLSD, } + def format(self, something): + """ + Pure Python implementation of the formatter. + Format a python object according to subclass formatting. + + :param something: A python object (typically a dict) to be serialized. + :returns: A serialized bytes object. + """ + stream = io.BytesIO() + self.write(stream, something) + return stream.getvalue() + + def write(self, stream, something): + """ + Serialize a python object to the passed binary 'stream' according to + subclass formatting. + + :param stream: A binary file-like object to which to serialize 'something'. + :param something: A python object (typically a dict) to be serialized. + """ + self.stream = stream + try: + return self._write(something) + finally: + self.stream = None + + _X_ORD = ord(b'x') _BACKSLASH_ORD = ord(b'\\') _DECODE_BUFF_ALLOC_SIZE = 1024 diff --git a/llsd/serde_binary.py b/llsd/serde_binary.py index 42a2c0d..673a246 100644 --- a/llsd/serde_binary.py +++ b/llsd/serde_binary.py @@ -1,5 +1,6 @@ import calendar import datetime +import io import struct import uuid @@ -152,74 +153,75 @@ def format_binary(something): :param something: a python object (typically a dict) to be serialized. :returns: Returns a LLSD binary formatted string. """ - return BINARY_HEADER + b'\n' + _format_binary_recurse(something) + stream = io.BytesIO() + write_binary(stream, something) + return stream.getvalue() -def _format_binary_recurse(something): - "Binary formatter workhorse." - def _format_list(something): - array_builder = [] - array_builder.append(b'[' + struct.pack('!i', len(something))) - for item in something: - array_builder.append(_format_binary_recurse(item)) - array_builder.append(b']') - return b''.join(array_builder) +def write_binary(stream, something): + stream.write(b'\n') + _write_binary_recurse(stream, something) + +def _write_binary_recurse(stream, something): + "Binary formatter workhorse." if something is None: - return b'!' + stream.write(b'!') elif isinstance(something, _LLSD): - return _format_binary_recurse(something.thing) + _write_binary_recurse(stream, something.thing) elif isinstance(something, bool): - if something: - return b'1' - else: - return b'0' + stream.write(b'1' if something else b'0') elif is_integer(something): try: - return b'i' + struct.pack('!i', something) + stream.writelines([b'i', struct.pack('!i', something)]) except (OverflowError, struct.error) as exc: raise LLSDSerializationError(str(exc), something) elif isinstance(something, float): try: - return b'r' + struct.pack('!d', something) + stream.writelines([b'r', struct.pack('!d', something)]) except SystemError as exc: raise LLSDSerializationError(str(exc), something) elif isinstance(something, uuid.UUID): - return b'u' + something.bytes + stream.writelines([b'u', something.bytes]) elif isinstance(something, binary): - return b'b' + struct.pack('!i', len(something)) + something + stream.writelines([b'b', struct.pack('!i', len(something)), something]) elif is_string(something): something = _str_to_bytes(something) - return b's' + struct.pack('!i', len(something)) + something + stream.writelines([b's', struct.pack('!i', len(something)), something]) elif isinstance(something, uri): - return b'l' + struct.pack('!i', len(something)) + something + stream.writelines([b'l', struct.pack('!i', len(something)), something]) elif isinstance(something, datetime.datetime): seconds_since_epoch = calendar.timegm(something.utctimetuple()) \ + something.microsecond // 1e6 - return b'd' + struct.pack(' # \u0000-\u00FF. It's also the fastest encoding, I believe, from # https://docs.python.org/3/library/codecs.html#encodings-and-unicode @@ -370,24 +365,42 @@ def UUID(self, v): # error behavior in case someone passes an invalid hex string, with # things other than 0-9a-fA-F, so that they will fail in the UUID # decode, rather than with a UnicodeError. - return B("u%s") % str(v).encode('latin-1') - def BINARY(self, v): - return b'b64"' + base64.b64encode(v).strip() + b'"' - - def STRING(self, v): - return B("'%s'") % _str_to_bytes(v).replace(b"\\", b"\\\\").replace(b"'", b"\\'") - def URI(self, v): - return B('l"%s"') % _str_to_bytes(v).replace(b"\\", b"\\\\").replace(b'"', b'\\"') - def DATE(self, v): - return B('d"%s"') % _format_datestr(v) - def ARRAY(self, v): - return B("[%s]") % b','.join([self._generate(item) for item in v]) - def MAP(self, v): - return B("{%s}") % b','.join([B("'%s':%s") % (_str_to_bytes(UnicodeType(key)).replace(b"\\", b"\\\\").replace(b"'", b"\\'"), self._generate(value)) - for key, value in v.items()]) + self.stream.writelines([b"u", str(v).encode('latin-1')]) + def _BINARY(self, v): + self.stream.writelines([b'b64"', base64.b64encode(v).strip(), b'"']) + + def _STRING(self, v): + self.stream.writelines([b"'", self._esc(v), b"'"]) + def _URI(self, v): + self.stream.writelines([b'l"', self._esc(v, b'"'), b'"']) + def _DATE(self, v): + self.stream.writelines([b'd"', _format_datestr(v), b'"']) + def _ARRAY(self, v): + self.stream.write(b'[') + delim = b'' + for item in v: + self.stream.write(delim) + self._generate(item) + delim = b',' + self.stream.write(b']') + def _MAP(self, v): + self.stream.write(b'{') + delim = b'' + for key, value in v.items(): + self.stream.writelines([delim, b"'", self._esc(UnicodeType(key)), b"':"]) + self._generate(value) + delim = b',' + self.stream.write(b'}') + + def _esc(self, data, quote=b"'"): + return _str_to_bytes(data).replace(b"\\", b"\\\\").replace(quote, b'\\'+quote) def _generate(self, something): - "Generate notation from a single python object." + """ + Serialize a python object to self.stream as application/llsd+notation + + :param something: a python object (typically a dict) to be serialized. + """ t = type(something) handler = self.type_map.get(t) if handler: @@ -396,19 +409,13 @@ def _generate(self, something): return self.type_map[_LLSD](something) else: try: - return self.ARRAY(iter(something)) + return self._ARRAY(iter(something)) except TypeError: raise LLSDSerializationError( "Cannot serialize unknown type: %s (%s)" % (t, something)) - def format(self, something): - """ - Format a python object as application/llsd+notation - - :param something: a python object (typically a dict) to be serialized. - :returns: Returns a LLSD notation formatted string. - """ - return self._generate(something) + # _write() method is an alias for _generate() + _write = _generate def format_notation(something): @@ -423,6 +430,19 @@ def format_notation(something): return LLSDNotationFormatter().format(something) +def write_notation(stream, something): + """ + Serialize to passed binary 'stream' a python object 'something' as + application/llsd+notation. + + :param stream: a binary stream open for writing. + :param something: a python object (typically a dict) to be serialized. + + See http://wiki.secondlife.com/wiki/LLSD#Notation_Serialization + """ + return LLSDNotationFormatter().write(stream, something) + + def parse_notation(something): """ This is the basic public interface for parsing llsd+notation. diff --git a/llsd/serde_xml.py b/llsd/serde_xml.py index 3e4fd8b..2f048a0 100644 --- a/llsd/serde_xml.py +++ b/llsd/serde_xml.py @@ -2,7 +2,7 @@ import re import types -from llsd.base import (_LLSD, ALL_CHARS, B, LLSDBaseParser, LLSDBaseFormatter, XML_HEADER, +from llsd.base import (_LLSD, ALL_CHARS, LLSDBaseParser, LLSDBaseFormatter, XML_HEADER, LLSDParseError, LLSDSerializationError, UnicodeType, _format_datestr, _str_to_bytes, _to_python, is_unicode) from llsd.fastest_elementtree import ElementTreeError, fromstring, parse as _parse @@ -27,33 +27,42 @@ def remove_invalid_xml_bytes(b): class LLSDXMLFormatter(LLSDBaseFormatter): """ - Class which implements LLSD XML serialization.. + Class which implements LLSD XML serialization. http://wiki.secondlife.com/wiki/LLSD#XML_Serialization - This class wraps both a pure python and c-extension for formatting - a limited subset of python objects as application/llsd+xml. You do - not generally need to make an instance of this object since the - module level format_xml is the most convenient interface to this - functionality. + This class serializes a limited subset of python objects as + application/llsd+xml. You do not generally need to make an instance of + this class since the module level format_xml() is the most convenient + interface to this functionality. """ - __slots__ = [] - def _elt(self, name, contents=None): - "Serialize a single element." + """ + Serialize a single element. + + If 'contents' is omitted, write . + If 'contents' is bytes, write contents. + If 'contents' is str, write contents.encode('utf8'). + If 'contents' is callable, write , call contents(), write . + """ if not contents: - return B("<%s />") % (name,) + self.stream.writelines([b"<", name, b" />"]) else: - return B("<%s>%s") % (name, _str_to_bytes(contents), name) + self.stream.writelines([b"<", name, b">"]) + if callable(contents): + contents() + else: + self.stream.write(_str_to_bytes(contents)) + self.stream.writelines([b""]) def xml_esc(self, v): "Escape string or unicode object v for xml output" # Use is_unicode() instead of is_string() because in python 2, str is - # bytes, not unicode, and should not be "encode()"'d. attempts to + # bytes, not unicode, and should not be "encode()"d. Attempts to # encode("utf-8") a bytes type will result in an implicit # decode("ascii") that will throw a UnicodeDecodeError if the string - # contains non-ascii characters + # contains non-ascii characters. if is_unicode(v): # we need to drop these invalid characters because they # cannot be parsed (and encode() doesn't drop them for us) @@ -63,47 +72,46 @@ def xml_esc(self, v): v = remove_invalid_xml_bytes(v) return v.replace(b'&',b'&').replace(b'<',b'<').replace(b'>',b'>') - def LLSD(self, v): + def _LLSD(self, v): return self._generate(v.thing) - def UNDEF(self, _v): + def _UNDEF(self, _v): return self._elt(b'undef') - def BOOLEAN(self, v): + def _BOOLEAN(self, v): if v: return self._elt(b'boolean', b'true') else: return self._elt(b'boolean', b'false') - def INTEGER(self, v): + def _INTEGER(self, v): return self._elt(b'integer', str(v)) - def REAL(self, v): + def _REAL(self, v): return self._elt(b'real', repr(v)) - def UUID(self, v): + def _UUID(self, v): if v.int == 0: return self._elt(b'uuid') else: return self._elt(b'uuid', str(v)) - def BINARY(self, v): + def _BINARY(self, v): return self._elt(b'binary', base64.b64encode(v).strip()) - def STRING(self, v): + def _STRING(self, v): return self._elt(b'string', self.xml_esc(v)) - def URI(self, v): + def _URI(self, v): return self._elt(b'uri', self.xml_esc(str(v))) - def DATE(self, v): + def _DATE(self, v): return self._elt(b'date', _format_datestr(v)) - def ARRAY(self, v): + def _ARRAY(self, v): return self._elt( b'array', - b''.join([self._generate(item) for item in v])) - def MAP(self, v): + lambda: [self._generate(item) for item in v]) + def _MAP(self, v): return self._elt( b'map', - b''.join([B("%s%s") % (self._elt(b'key', self.xml_esc(UnicodeType(key))), - self._generate(value)) - for key, value in v.items()])) + lambda: [(self._elt(b'key', self.xml_esc(UnicodeType(key))), + self._generate(value)) + for key, value in v.items()]) - typeof = type def _generate(self, something): "Generate xml from a single python object." - t = self.typeof(something) + t = type(something) if t in self.type_map: return self.type_map[t](something) elif isinstance(something, _LLSD): @@ -112,18 +120,15 @@ def _generate(self, something): raise LLSDSerializationError( "Cannot serialize unknown type: %s (%s)" % (t, something)) - def _format(self, something): - "Pure Python implementation of the formatter." - return b'' + self._elt(b"llsd", self._generate(something)) - - def format(self, something): + def _write(self, something): """ - Format a python object as application/llsd+xml + Serialize a python object to self.stream as application/llsd+xml. :param something: A python object (typically a dict) to be serialized. - :returns: Returns an XML formatted string. """ - return self._format(something) + self.stream.write(b'') + self._elt(b"llsd", lambda: self._generate(something)) + class LLSDXMLPrettyFormatter(LLSDXMLFormatter): """ @@ -143,13 +148,6 @@ def __init__(self, indent_atom = None): # Call the super class constructor so that we have the type map super(LLSDXMLPrettyFormatter, self).__init__() - # Override the type map to use our specialized formatters to - # emit the pretty output. - self.type_map[list] = self.PRETTY_ARRAY - self.type_map[tuple] = self.PRETTY_ARRAY - self.type_map[types.GeneratorType] = self.PRETTY_ARRAY, - self.type_map[dict] = self.PRETTY_MAP - # Private data used for indentation. self._indent_level = 1 if indent_atom is None: @@ -158,54 +156,46 @@ def __init__(self, indent_atom = None): self._indent_atom = indent_atom def _indent(self): - "Return an indentation based on the atom and indentation level." - return self._indent_atom * self._indent_level + "Write an indentation based on the atom and indentation level." + self.stream.writelines([self._indent_atom] * self._indent_level) - def PRETTY_ARRAY(self, v): + def _ARRAY(self, v): "Recursively format an array with pretty turned on." - rv = [] - rv.append(b'\n') - self._indent_level = self._indent_level + 1 - rv.extend([B("%s%s\n") % - (self._indent(), - self._generate(item)) - for item in v]) - self._indent_level = self._indent_level - 1 - rv.append(self._indent()) - rv.append(b'') - return b''.join(rv) - - def PRETTY_MAP(self, v): + self.stream.write(b'\n') + self._indent_level += 1 + for item in v: + self._indent() + self._generate(item) + self.stream.write(b'\n') + self._indent_level -= 1 + self._indent() + self.stream.write(b'') + + def _MAP(self, v): "Recursively format a map with pretty turned on." - rv = [] - rv.append(b'\n') - self._indent_level = self._indent_level + 1 - # list of keys - keys = list(v) - keys.sort() - rv.extend([B("%s%s\n%s%s\n") % - (self._indent(), - self._elt(b'key', UnicodeType(key)), - self._indent(), - self._generate(v[key])) - for key in keys]) - self._indent_level = self._indent_level - 1 - rv.append(self._indent()) - rv.append(b'') - return b''.join(rv) - - def format(self, something): + self.stream.write(b'\n') + self._indent_level += 1 + # sorted list of keys + for key in sorted(v): + self._indent() + self._elt(b'key', UnicodeType(key)) + self.stream.write(b'\n') + self._indent() + self._generate(v[key]) + self.stream.write(b'\n') + self._indent_level -= 1 + self._indent() + self.stream.write(b'') + + def _write(self, something): """ - Format a python object as application/llsd+xml + Serialize a python object to self.stream as 'pretty' application/llsd+xml. :param something: a python object (typically a dict) to be serialized. - :returns: Returns an XML formatted string. """ - data = [] - data.append(b'\n') - data.append(self._generate(something)) - data.append(b'\n') - return b'\n'.join(data) + self.stream.write(b'\n') + self._generate(something) + self.stream.write(b'\n') def format_pretty_xml(something): @@ -227,6 +217,26 @@ def format_pretty_xml(something): return LLSDXMLPrettyFormatter().format(something) +def write_pretty_xml(stream, something): + """ + Serialize to passed 'stream' the python object 'something' as 'pretty' + application/llsd+xml. + + :param stream: a binary stream open for writing. + :param something: a python object (typically a dict) to be serialized. + + See http://wiki.secondlife.com/wiki/LLSD#XML_Serialization + + The output conforms to the LLSD DTD, unlike the output from the + standard python xml.dom DOM::toprettyxml() method which does not + preserve significant whitespace. + This function is not necessarily suited for serializing very large + objects. It sorts on dict (llsd map) keys alphabetically to ease human + reading. + """ + return LLSDXMLPrettyFormatter().write(stream, something) + + def parse_xml(something): """ This is the basic public interface for parsing llsd+xml. @@ -266,7 +276,6 @@ def parse_xml_nohdr(baseparser): return _to_python(element[0]) -_g_xml_formatter = None def format_xml(something): """ Format a python object as application/llsd+xml @@ -274,12 +283,19 @@ def format_xml(something): :param something: a python object (typically a dict) to be serialized. :returns: Returns an XML formatted string. - Ssee http://wiki.secondlife.com/wiki/LLSD#XML_Serialization + See http://wiki.secondlife.com/wiki/LLSD#XML_Serialization + """ + return LLSDXMLFormatter().format(something) + + +def write_xml(stream, something): + """ + Serialize to passed 'stream' the python object 'something' as + application/llsd+xml. - This function wraps both a pure python and c-extension for formatting - a limited subset of python objects as application/llsd+xml. + :param stream: a binary stream open for writing. + :param something: a python object (typically a dict) to be serialized. + + See http://wiki.secondlife.com/wiki/LLSD#XML_Serialization """ - global _g_xml_formatter - if _g_xml_formatter is None: - _g_xml_formatter = LLSDXMLFormatter() - return _g_xml_formatter.format(something) + return LLSDXMLFormatter().write(stream, something)