diff options
Diffstat (limited to 'lib/simplejson/encoder.py')
| -rw-r--r-- | lib/simplejson/encoder.py | 534 | 
1 files changed, 534 insertions, 0 deletions
| diff --git a/lib/simplejson/encoder.py b/lib/simplejson/encoder.py new file mode 100644 index 000000000..5ec7440f1 --- /dev/null +++ b/lib/simplejson/encoder.py @@ -0,0 +1,534 @@ +"""Implementation of JSONEncoder +""" +import re +from decimal import Decimal + +def _import_speedups(): +    try: +        from simplejson import _speedups +        return _speedups.encode_basestring_ascii, _speedups.make_encoder +    except ImportError: +        return None, None +c_encode_basestring_ascii, c_make_encoder = _import_speedups() + +from simplejson.decoder import PosInf + +ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') +HAS_UTF8 = re.compile(r'[\x80-\xff]') +ESCAPE_DCT = { +    '\\': '\\\\', +    '"': '\\"', +    '\b': '\\b', +    '\f': '\\f', +    '\n': '\\n', +    '\r': '\\r', +    '\t': '\\t', +    u'\u2028': '\\u2028', +    u'\u2029': '\\u2029', +} +for i in range(0x20): +    #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) +    ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) + +FLOAT_REPR = repr + +def encode_basestring(s): +    """Return a JSON representation of a Python string + +    """ +    if isinstance(s, str) and HAS_UTF8.search(s) is not None: +        s = s.decode('utf-8') +    def replace(match): +        return ESCAPE_DCT[match.group(0)] +    return u'"' + ESCAPE.sub(replace, s) + u'"' + + +def py_encode_basestring_ascii(s): +    """Return an ASCII-only JSON representation of a Python string + +    """ +    if isinstance(s, str) and HAS_UTF8.search(s) is not None: +        s = s.decode('utf-8') +    def replace(match): +        s = match.group(0) +        try: +            return ESCAPE_DCT[s] +        except KeyError: +            n = ord(s) +            if n < 0x10000: +                #return '\\u{0:04x}'.format(n) +                return '\\u%04x' % (n,) +            else: +                # surrogate pair +                n -= 0x10000 +                s1 = 0xd800 | ((n >> 10) & 0x3ff) +                s2 = 0xdc00 | (n & 0x3ff) +                #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) +                return '\\u%04x\\u%04x' % (s1, s2) +    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + + +encode_basestring_ascii = ( +    c_encode_basestring_ascii or py_encode_basestring_ascii) + +class JSONEncoder(object): +    """Extensible JSON <http://json.org> encoder for Python data structures. + +    Supports the following objects and types by default: + +    +-------------------+---------------+ +    | Python            | JSON          | +    +===================+===============+ +    | dict, namedtuple  | object        | +    +-------------------+---------------+ +    | list, tuple       | array         | +    +-------------------+---------------+ +    | str, unicode      | string        | +    +-------------------+---------------+ +    | int, long, float  | number        | +    +-------------------+---------------+ +    | True              | true          | +    +-------------------+---------------+ +    | False             | false         | +    +-------------------+---------------+ +    | None              | null          | +    +-------------------+---------------+ + +    To extend this to recognize other objects, subclass and implement a +    ``.default()`` method with another method that returns a serializable +    object for ``o`` if possible, otherwise it should call the superclass +    implementation (to raise ``TypeError``). + +    """ +    item_separator = ', ' +    key_separator = ': ' +    def __init__(self, skipkeys=False, ensure_ascii=True, +            check_circular=True, allow_nan=True, sort_keys=False, +            indent=None, separators=None, encoding='utf-8', default=None, +            use_decimal=True, namedtuple_as_object=True, +            tuple_as_array=True): +        """Constructor for JSONEncoder, with sensible defaults. + +        If skipkeys is false, then it is a TypeError to attempt +        encoding of keys that are not str, int, long, float or None.  If +        skipkeys is True, such items are simply skipped. + +        If ensure_ascii is true, the output is guaranteed to be str +        objects with all incoming unicode characters escaped.  If +        ensure_ascii is false, the output will be unicode object. + +        If check_circular is true, then lists, dicts, and custom encoded +        objects will be checked for circular references during encoding to +        prevent an infinite recursion (which would cause an OverflowError). +        Otherwise, no such check takes place. + +        If allow_nan is true, then NaN, Infinity, and -Infinity will be +        encoded as such.  This behavior is not JSON specification compliant, +        but is consistent with most JavaScript based encoders and decoders. +        Otherwise, it will be a ValueError to encode such floats. + +        If sort_keys is true, then the output of dictionaries will be +        sorted by key; this is useful for regression tests to ensure +        that JSON serializations can be compared on a day-to-day basis. + +        If indent is a string, then JSON array elements and object members +        will be pretty-printed with a newline followed by that string repeated +        for each level of nesting. ``None`` (the default) selects the most compact +        representation without any newlines. For backwards compatibility with +        versions of simplejson earlier than 2.1.0, an integer is also accepted +        and is converted to a string with that many spaces. + +        If specified, separators should be a (item_separator, key_separator) +        tuple.  The default is (', ', ': ').  To get the most compact JSON +        representation you should specify (',', ':') to eliminate whitespace. + +        If specified, default is a function that gets called for objects +        that can't otherwise be serialized.  It should return a JSON encodable +        version of the object or raise a ``TypeError``. + +        If encoding is not None, then all input strings will be +        transformed into unicode using that encoding prior to JSON-encoding. +        The default is UTF-8. + +        If use_decimal is true (not the default), ``decimal.Decimal`` will +        be supported directly by the encoder. For the inverse, decode JSON +        with ``parse_float=decimal.Decimal``. + +        If namedtuple_as_object is true (the default), tuple subclasses with +        ``_asdict()`` methods will be encoded as JSON objects. +         +        If tuple_as_array is true (the default), tuple (and subclasses) will +        be encoded as JSON arrays. +        """ + +        self.skipkeys = skipkeys +        self.ensure_ascii = ensure_ascii +        self.check_circular = check_circular +        self.allow_nan = allow_nan +        self.sort_keys = sort_keys +        self.use_decimal = use_decimal +        self.namedtuple_as_object = namedtuple_as_object +        self.tuple_as_array = tuple_as_array +        if isinstance(indent, (int, long)): +            indent = ' ' * indent +        self.indent = indent +        if separators is not None: +            self.item_separator, self.key_separator = separators +        elif indent is not None: +            self.item_separator = ',' +        if default is not None: +            self.default = default +        self.encoding = encoding + +    def default(self, o): +        """Implement this method in a subclass such that it returns +        a serializable object for ``o``, or calls the base implementation +        (to raise a ``TypeError``). + +        For example, to support arbitrary iterators, you could +        implement default like this:: + +            def default(self, o): +                try: +                    iterable = iter(o) +                except TypeError: +                    pass +                else: +                    return list(iterable) +                return JSONEncoder.default(self, o) + +        """ +        raise TypeError(repr(o) + " is not JSON serializable") + +    def encode(self, o): +        """Return a JSON string representation of a Python data structure. + +        >>> from simplejson import JSONEncoder +        >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) +        '{"foo": ["bar", "baz"]}' + +        """ +        # This is for extremely simple cases and benchmarks. +        if isinstance(o, basestring): +            if isinstance(o, str): +                _encoding = self.encoding +                if (_encoding is not None +                        and not (_encoding == 'utf-8')): +                    o = o.decode(_encoding) +            if self.ensure_ascii: +                return encode_basestring_ascii(o) +            else: +                return encode_basestring(o) +        # This doesn't pass the iterator directly to ''.join() because the +        # exceptions aren't as detailed.  The list call should be roughly +        # equivalent to the PySequence_Fast that ''.join() would do. +        chunks = self.iterencode(o, _one_shot=True) +        if not isinstance(chunks, (list, tuple)): +            chunks = list(chunks) +        if self.ensure_ascii: +            return ''.join(chunks) +        else: +            return u''.join(chunks) + +    def iterencode(self, o, _one_shot=False): +        """Encode the given object and yield each string +        representation as available. + +        For example:: + +            for chunk in JSONEncoder().iterencode(bigobject): +                mysocket.write(chunk) + +        """ +        if self.check_circular: +            markers = {} +        else: +            markers = None +        if self.ensure_ascii: +            _encoder = encode_basestring_ascii +        else: +            _encoder = encode_basestring +        if self.encoding != 'utf-8': +            def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): +                if isinstance(o, str): +                    o = o.decode(_encoding) +                return _orig_encoder(o) + +        def floatstr(o, allow_nan=self.allow_nan, +                _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf): +            # Check for specials. Note that this type of test is processor +            # and/or platform-specific, so do tests which don't depend on +            # the internals. + +            if o != o: +                text = 'NaN' +            elif o == _inf: +                text = 'Infinity' +            elif o == _neginf: +                text = '-Infinity' +            else: +                return _repr(o) + +            if not allow_nan: +                raise ValueError( +                    "Out of range float values are not JSON compliant: " + +                    repr(o)) + +            return text + + +        key_memo = {} +        if (_one_shot and c_make_encoder is not None +                and self.indent is None): +            _iterencode = c_make_encoder( +                markers, self.default, _encoder, self.indent, +                self.key_separator, self.item_separator, self.sort_keys, +                self.skipkeys, self.allow_nan, key_memo, self.use_decimal, +                self.namedtuple_as_object, self.tuple_as_array) +        else: +            _iterencode = _make_iterencode( +                markers, self.default, _encoder, self.indent, floatstr, +                self.key_separator, self.item_separator, self.sort_keys, +                self.skipkeys, _one_shot, self.use_decimal, +                self.namedtuple_as_object, self.tuple_as_array) +        try: +            return _iterencode(o, 0) +        finally: +            key_memo.clear() + + +class JSONEncoderForHTML(JSONEncoder): +    """An encoder that produces JSON safe to embed in HTML. + +    To embed JSON content in, say, a script tag on a web page, the +    characters &, < and > should be escaped. They cannot be escaped +    with the usual entities (e.g. &) because they are not expanded +    within <script> tags. +    """ + +    def encode(self, o): +        # Override JSONEncoder.encode because it has hacks for +        # performance that make things more complicated. +        chunks = self.iterencode(o, True) +        if self.ensure_ascii: +            return ''.join(chunks) +        else: +            return u''.join(chunks) + +    def iterencode(self, o, _one_shot=False): +        chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot) +        for chunk in chunks: +            chunk = chunk.replace('&', '\\u0026') +            chunk = chunk.replace('<', '\\u003c') +            chunk = chunk.replace('>', '\\u003e') +            yield chunk + + +def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, +        _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, +        _use_decimal, _namedtuple_as_object, _tuple_as_array, +        ## HACK: hand-optimized bytecode; turn globals into locals +        False=False, +        True=True, +        ValueError=ValueError, +        basestring=basestring, +        Decimal=Decimal, +        dict=dict, +        float=float, +        id=id, +        int=int, +        isinstance=isinstance, +        list=list, +        long=long, +        str=str, +        tuple=tuple, +    ): + +    def _iterencode_list(lst, _current_indent_level): +        if not lst: +            yield '[]' +            return +        if markers is not None: +            markerid = id(lst) +            if markerid in markers: +                raise ValueError("Circular reference detected") +            markers[markerid] = lst +        buf = '[' +        if _indent is not None: +            _current_indent_level += 1 +            newline_indent = '\n' + (_indent * _current_indent_level) +            separator = _item_separator + newline_indent +            buf += newline_indent +        else: +            newline_indent = None +            separator = _item_separator +        first = True +        for value in lst: +            if first: +                first = False +            else: +                buf = separator +            if isinstance(value, basestring): +                yield buf + _encoder(value) +            elif value is None: +                yield buf + 'null' +            elif value is True: +                yield buf + 'true' +            elif value is False: +                yield buf + 'false' +            elif isinstance(value, (int, long)): +                yield buf + str(value) +            elif isinstance(value, float): +                yield buf + _floatstr(value) +            elif _use_decimal and isinstance(value, Decimal): +                yield buf + str(value) +            else: +                yield buf +                if isinstance(value, list): +                    chunks = _iterencode_list(value, _current_indent_level) +                elif (_namedtuple_as_object and isinstance(value, tuple) and +                        hasattr(value, '_asdict')): +                    chunks = _iterencode_dict(value._asdict(), +                                              _current_indent_level) +                elif _tuple_as_array and isinstance(value, tuple): +                    chunks = _iterencode_list(value, _current_indent_level) +                elif isinstance(value, dict): +                    chunks = _iterencode_dict(value, _current_indent_level) +                else: +                    chunks = _iterencode(value, _current_indent_level) +                for chunk in chunks: +                    yield chunk +        if newline_indent is not None: +            _current_indent_level -= 1 +            yield '\n' + (_indent * _current_indent_level) +        yield ']' +        if markers is not None: +            del markers[markerid] + +    def _iterencode_dict(dct, _current_indent_level): +        if not dct: +            yield '{}' +            return +        if markers is not None: +            markerid = id(dct) +            if markerid in markers: +                raise ValueError("Circular reference detected") +            markers[markerid] = dct +        yield '{' +        if _indent is not None: +            _current_indent_level += 1 +            newline_indent = '\n' + (_indent * _current_indent_level) +            item_separator = _item_separator + newline_indent +            yield newline_indent +        else: +            newline_indent = None +            item_separator = _item_separator +        first = True +        if _sort_keys: +            items = dct.items() +            items.sort(key=lambda kv: kv[0]) +        else: +            items = dct.iteritems() +        for key, value in items: +            if isinstance(key, basestring): +                pass +            # JavaScript is weakly typed for these, so it makes sense to +            # also allow them.  Many encoders seem to do something like this. +            elif isinstance(key, float): +                key = _floatstr(key) +            elif key is True: +                key = 'true' +            elif key is False: +                key = 'false' +            elif key is None: +                key = 'null' +            elif isinstance(key, (int, long)): +                key = str(key) +            elif _skipkeys: +                continue +            else: +                raise TypeError("key " + repr(key) + " is not a string") +            if first: +                first = False +            else: +                yield item_separator +            yield _encoder(key) +            yield _key_separator +            if isinstance(value, basestring): +                yield _encoder(value) +            elif value is None: +                yield 'null' +            elif value is True: +                yield 'true' +            elif value is False: +                yield 'false' +            elif isinstance(value, (int, long)): +                yield str(value) +            elif isinstance(value, float): +                yield _floatstr(value) +            elif _use_decimal and isinstance(value, Decimal): +                yield str(value) +            else: +                if isinstance(value, list): +                    chunks = _iterencode_list(value, _current_indent_level) +                elif (_namedtuple_as_object and isinstance(value, tuple) and +                        hasattr(value, '_asdict')): +                    chunks = _iterencode_dict(value._asdict(), +                                              _current_indent_level) +                elif _tuple_as_array and isinstance(value, tuple): +                    chunks = _iterencode_list(value, _current_indent_level) +                elif isinstance(value, dict): +                    chunks = _iterencode_dict(value, _current_indent_level) +                else: +                    chunks = _iterencode(value, _current_indent_level) +                for chunk in chunks: +                    yield chunk +        if newline_indent is not None: +            _current_indent_level -= 1 +            yield '\n' + (_indent * _current_indent_level) +        yield '}' +        if markers is not None: +            del markers[markerid] + +    def _iterencode(o, _current_indent_level): +        if isinstance(o, basestring): +            yield _encoder(o) +        elif o is None: +            yield 'null' +        elif o is True: +            yield 'true' +        elif o is False: +            yield 'false' +        elif isinstance(o, (int, long)): +            yield str(o) +        elif isinstance(o, float): +            yield _floatstr(o) +        elif isinstance(o, list): +            for chunk in _iterencode_list(o, _current_indent_level): +                yield chunk +        elif (_namedtuple_as_object and isinstance(o, tuple) and +                hasattr(o, '_asdict')): +            for chunk in _iterencode_dict(o._asdict(), _current_indent_level): +                yield chunk +        elif (_tuple_as_array and isinstance(o, tuple)): +            for chunk in _iterencode_list(o, _current_indent_level): +                yield chunk +        elif isinstance(o, dict): +            for chunk in _iterencode_dict(o, _current_indent_level): +                yield chunk +        elif _use_decimal and isinstance(o, Decimal): +            yield str(o) +        else: +            if markers is not None: +                markerid = id(o) +                if markerid in markers: +                    raise ValueError("Circular reference detected") +                markers[markerid] = o +            o = _default(o) +            for chunk in _iterencode(o, _current_indent_level): +                yield chunk +            if markers is not None: +                del markers[markerid] + +    return _iterencode | 
