diff options
Diffstat (limited to 'lib/simplejson')
| -rw-r--r-- | lib/simplejson/__init__.py | 466 | ||||
| -rw-r--r-- | lib/simplejson/decoder.py | 421 | ||||
| -rw-r--r-- | lib/simplejson/encoder.py | 534 | ||||
| -rw-r--r-- | lib/simplejson/ordered_dict.py | 119 | ||||
| -rw-r--r-- | lib/simplejson/scanner.py | 77 | ||||
| -rw-r--r-- | lib/simplejson/tool.py | 39 | 
6 files changed, 1656 insertions, 0 deletions
| diff --git a/lib/simplejson/__init__.py b/lib/simplejson/__init__.py new file mode 100644 index 000000000..ef5c0db48 --- /dev/null +++ b/lib/simplejson/__init__.py @@ -0,0 +1,466 @@ +r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format. + +:mod:`simplejson` exposes an API familiar to users of the standard library +:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained +version of the :mod:`json` library contained in Python 2.6, but maintains +compatibility with Python 2.4 and Python 2.5 and (currently) has +significant performance advantages, even without using the optional C +extension for speedups. + +Encoding basic Python object hierarchies:: + +    >>> import simplejson as json +    >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) +    '["foo", {"bar": ["baz", null, 1.0, 2]}]' +    >>> print json.dumps("\"foo\bar") +    "\"foo\bar" +    >>> print json.dumps(u'\u1234') +    "\u1234" +    >>> print json.dumps('\\') +    "\\" +    >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) +    {"a": 0, "b": 0, "c": 0} +    >>> from StringIO import StringIO +    >>> io = StringIO() +    >>> json.dump(['streaming API'], io) +    >>> io.getvalue() +    '["streaming API"]' + +Compact encoding:: + +    >>> import simplejson as json +    >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) +    '[1,2,3,{"4":5,"6":7}]' + +Pretty printing:: + +    >>> import simplejson as json +    >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent='    ') +    >>> print '\n'.join([l.rstrip() for l in  s.splitlines()]) +    { +        "4": 5, +        "6": 7 +    } + +Decoding JSON:: + +    >>> import simplejson as json +    >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] +    >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj +    True +    >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' +    True +    >>> from StringIO import StringIO +    >>> io = StringIO('["streaming API"]') +    >>> json.load(io)[0] == 'streaming API' +    True + +Specializing JSON object decoding:: + +    >>> import simplejson as json +    >>> def as_complex(dct): +    ...     if '__complex__' in dct: +    ...         return complex(dct['real'], dct['imag']) +    ...     return dct +    ... +    >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', +    ...     object_hook=as_complex) +    (1+2j) +    >>> from decimal import Decimal +    >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1') +    True + +Specializing JSON object encoding:: + +    >>> import simplejson as json +    >>> def encode_complex(obj): +    ...     if isinstance(obj, complex): +    ...         return [obj.real, obj.imag] +    ...     raise TypeError(repr(o) + " is not JSON serializable") +    ... +    >>> json.dumps(2 + 1j, default=encode_complex) +    '[2.0, 1.0]' +    >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) +    '[2.0, 1.0]' +    >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) +    '[2.0, 1.0]' + + +Using simplejson.tool from the shell to validate and pretty-print:: + +    $ echo '{"json":"obj"}' | python -m simplejson.tool +    { +        "json": "obj" +    } +    $ echo '{ 1.2:3.4}' | python -m simplejson.tool +    Expecting property name: line 1 column 2 (char 2) +""" +__version__ = '2.2.1' +__all__ = [ +    'dump', 'dumps', 'load', 'loads', +    'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', +    'OrderedDict', +] + +__author__ = 'Bob Ippolito <bob@redivi.com>' + +from decimal import Decimal + +from decoder import JSONDecoder, JSONDecodeError +from encoder import JSONEncoder +def _import_OrderedDict(): +    import collections +    try: +        return collections.OrderedDict +    except AttributeError: +        import ordered_dict +        return ordered_dict.OrderedDict +OrderedDict = _import_OrderedDict() + +def _import_c_make_encoder(): +    try: +        from simplejson._speedups import make_encoder +        return make_encoder +    except ImportError: +        return None + +_default_encoder = JSONEncoder( +    skipkeys=False, +    ensure_ascii=True, +    check_circular=True, +    allow_nan=True, +    indent=None, +    separators=None, +    encoding='utf-8', +    default=None, +    use_decimal=True, +    namedtuple_as_object=True, +    tuple_as_array=True, +) + +def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, +        allow_nan=True, cls=None, indent=None, separators=None, +        encoding='utf-8', default=None, use_decimal=True, +        namedtuple_as_object=True, tuple_as_array=True, +        **kw): +    """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a +    ``.write()``-supporting file-like object). + +    If ``skipkeys`` is true then ``dict`` keys that are not basic types +    (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) +    will be skipped instead of raising a ``TypeError``. + +    If ``ensure_ascii`` is false, then the some chunks written to ``fp`` +    may be ``unicode`` instances, subject to normal Python ``str`` to +    ``unicode`` coercion rules. Unless ``fp.write()`` explicitly +    understands ``unicode`` (as in ``codecs.getwriter()``) this is likely +    to cause an error. + +    If ``check_circular`` is false, then the circular reference check +    for container types will be skipped and a circular reference will +    result in an ``OverflowError`` (or worse). + +    If ``allow_nan`` is false, then it will be a ``ValueError`` to +    serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) +    in strict compliance of the JSON specification, instead of using the +    JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + +    If *indent* is a string, then JSON array elements and object members +    will be pretty-printed with a newline followed by that string repeated +    for each level of nesting. ``None`` (the default) selects the most compact +    representation without any newlines. For backwards compatibility with +    versions of simplejson earlier than 2.1.0, an integer is also accepted +    and is converted to a string with that many spaces. + +    If ``separators`` is an ``(item_separator, dict_separator)`` tuple +    then it will be used instead of the default ``(', ', ': ')`` separators. +    ``(',', ':')`` is the most compact JSON representation. + +    ``encoding`` is the character encoding for str instances, default is UTF-8. + +    ``default(obj)`` is a function that should return a serializable version +    of obj or raise TypeError. The default simply raises TypeError. + +    If *use_decimal* is true (default: ``True``) then decimal.Decimal +    will be natively serialized to JSON with full precision. + +    If *namedtuple_as_object* is true (default: ``True``), +    :class:`tuple` subclasses with ``_asdict()`` methods will be encoded +    as JSON objects. +     +    If *tuple_as_array* is true (default: ``True``), +    :class:`tuple` (and subclasses) will be encoded as JSON arrays. + +    To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the +    ``.default()`` method to serialize additional types), specify it with +    the ``cls`` kwarg. + +    """ +    # cached encoder +    if (not skipkeys and ensure_ascii and +        check_circular and allow_nan and +        cls is None and indent is None and separators is None and +        encoding == 'utf-8' and default is None and use_decimal +        and namedtuple_as_object and tuple_as_array and not kw): +        iterable = _default_encoder.iterencode(obj) +    else: +        if cls is None: +            cls = JSONEncoder +        iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, +            check_circular=check_circular, allow_nan=allow_nan, indent=indent, +            separators=separators, encoding=encoding, +            default=default, use_decimal=use_decimal, +            namedtuple_as_object=namedtuple_as_object, +            tuple_as_array=tuple_as_array, +            **kw).iterencode(obj) +    # could accelerate with writelines in some versions of Python, at +    # a debuggability cost +    for chunk in iterable: +        fp.write(chunk) + + +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, +        allow_nan=True, cls=None, indent=None, separators=None, +        encoding='utf-8', default=None, use_decimal=True, +        namedtuple_as_object=True, +        tuple_as_array=True, +        **kw): +    """Serialize ``obj`` to a JSON formatted ``str``. + +    If ``skipkeys`` is false then ``dict`` keys that are not basic types +    (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) +    will be skipped instead of raising a ``TypeError``. + +    If ``ensure_ascii`` is false, then the return value will be a +    ``unicode`` instance subject to normal Python ``str`` to ``unicode`` +    coercion rules instead of being escaped to an ASCII ``str``. + +    If ``check_circular`` is false, then the circular reference check +    for container types will be skipped and a circular reference will +    result in an ``OverflowError`` (or worse). + +    If ``allow_nan`` is false, then it will be a ``ValueError`` to +    serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in +    strict compliance of the JSON specification, instead of using the +    JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + +    If ``indent`` is a string, then JSON array elements and object members +    will be pretty-printed with a newline followed by that string repeated +    for each level of nesting. ``None`` (the default) selects the most compact +    representation without any newlines. For backwards compatibility with +    versions of simplejson earlier than 2.1.0, an integer is also accepted +    and is converted to a string with that many spaces. + +    If ``separators`` is an ``(item_separator, dict_separator)`` tuple +    then it will be used instead of the default ``(', ', ': ')`` separators. +    ``(',', ':')`` is the most compact JSON representation. + +    ``encoding`` is the character encoding for str instances, default is UTF-8. + +    ``default(obj)`` is a function that should return a serializable version +    of obj or raise TypeError. The default simply raises TypeError. + +    If *use_decimal* is true (default: ``True``) then decimal.Decimal +    will be natively serialized to JSON with full precision. + +    If *namedtuple_as_object* is true (default: ``True``), +    :class:`tuple` subclasses with ``_asdict()`` methods will be encoded +    as JSON objects. +     +    If *tuple_as_array* is true (default: ``True``), +    :class:`tuple` (and subclasses) will be encoded as JSON arrays. + +    To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the +    ``.default()`` method to serialize additional types), specify it with +    the ``cls`` kwarg. + +    """ +    # cached encoder +    if (not skipkeys and ensure_ascii and +        check_circular and allow_nan and +        cls is None and indent is None and separators is None and +        encoding == 'utf-8' and default is None and use_decimal +        and namedtuple_as_object and tuple_as_array and not kw): +        return _default_encoder.encode(obj) +    if cls is None: +        cls = JSONEncoder +    return cls( +        skipkeys=skipkeys, ensure_ascii=ensure_ascii, +        check_circular=check_circular, allow_nan=allow_nan, indent=indent, +        separators=separators, encoding=encoding, default=default, +        use_decimal=use_decimal, +        namedtuple_as_object=namedtuple_as_object, +        tuple_as_array=tuple_as_array, +        **kw).encode(obj) + + +_default_decoder = JSONDecoder(encoding=None, object_hook=None, +                               object_pairs_hook=None) + + +def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, +        parse_int=None, parse_constant=None, object_pairs_hook=None, +        use_decimal=False, namedtuple_as_object=True, tuple_as_array=True, +        **kw): +    """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing +    a JSON document) to a Python object. + +    *encoding* determines the encoding used to interpret any +    :class:`str` objects decoded by this instance (``'utf-8'`` by +    default).  It has no effect when decoding :class:`unicode` objects. + +    Note that currently only encodings that are a superset of ASCII work, +    strings of other encodings should be passed in as :class:`unicode`. + +    *object_hook*, if specified, will be called with the result of every +    JSON object decoded and its return value will be used in place of the +    given :class:`dict`.  This can be used to provide custom +    deserializations (e.g. to support JSON-RPC class hinting). + +    *object_pairs_hook* is an optional function that will be called with +    the result of any object literal decode with an ordered list of pairs. +    The return value of *object_pairs_hook* will be used instead of the +    :class:`dict`.  This feature can be used to implement custom decoders +    that rely on the order that the key and value pairs are decoded (for +    example, :func:`collections.OrderedDict` will remember the order of +    insertion). If *object_hook* is also defined, the *object_pairs_hook* +    takes priority. + +    *parse_float*, if specified, will be called with the string of every +    JSON float to be decoded.  By default, this is equivalent to +    ``float(num_str)``. This can be used to use another datatype or parser +    for JSON floats (e.g. :class:`decimal.Decimal`). + +    *parse_int*, if specified, will be called with the string of every +    JSON int to be decoded.  By default, this is equivalent to +    ``int(num_str)``.  This can be used to use another datatype or parser +    for JSON integers (e.g. :class:`float`). + +    *parse_constant*, if specified, will be called with one of the +    following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``.  This +    can be used to raise an exception if invalid JSON numbers are +    encountered. + +    If *use_decimal* is true (default: ``False``) then it implies +    parse_float=decimal.Decimal for parity with ``dump``. + +    To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` +    kwarg. + +    """ +    return loads(fp.read(), +        encoding=encoding, cls=cls, object_hook=object_hook, +        parse_float=parse_float, parse_int=parse_int, +        parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, +        use_decimal=use_decimal, **kw) + + +def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, +        parse_int=None, parse_constant=None, object_pairs_hook=None, +        use_decimal=False, **kw): +    """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON +    document) to a Python object. + +    *encoding* determines the encoding used to interpret any +    :class:`str` objects decoded by this instance (``'utf-8'`` by +    default).  It has no effect when decoding :class:`unicode` objects. + +    Note that currently only encodings that are a superset of ASCII work, +    strings of other encodings should be passed in as :class:`unicode`. + +    *object_hook*, if specified, will be called with the result of every +    JSON object decoded and its return value will be used in place of the +    given :class:`dict`.  This can be used to provide custom +    deserializations (e.g. to support JSON-RPC class hinting). + +    *object_pairs_hook* is an optional function that will be called with +    the result of any object literal decode with an ordered list of pairs. +    The return value of *object_pairs_hook* will be used instead of the +    :class:`dict`.  This feature can be used to implement custom decoders +    that rely on the order that the key and value pairs are decoded (for +    example, :func:`collections.OrderedDict` will remember the order of +    insertion). If *object_hook* is also defined, the *object_pairs_hook* +    takes priority. + +    *parse_float*, if specified, will be called with the string of every +    JSON float to be decoded.  By default, this is equivalent to +    ``float(num_str)``. This can be used to use another datatype or parser +    for JSON floats (e.g. :class:`decimal.Decimal`). + +    *parse_int*, if specified, will be called with the string of every +    JSON int to be decoded.  By default, this is equivalent to +    ``int(num_str)``.  This can be used to use another datatype or parser +    for JSON integers (e.g. :class:`float`). + +    *parse_constant*, if specified, will be called with one of the +    following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``.  This +    can be used to raise an exception if invalid JSON numbers are +    encountered. + +    If *use_decimal* is true (default: ``False``) then it implies +    parse_float=decimal.Decimal for parity with ``dump``. + +    To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` +    kwarg. + +    """ +    if (cls is None and encoding is None and object_hook is None and +            parse_int is None and parse_float is None and +            parse_constant is None and object_pairs_hook is None +            and not use_decimal and not kw): +        return _default_decoder.decode(s) +    if cls is None: +        cls = JSONDecoder +    if object_hook is not None: +        kw['object_hook'] = object_hook +    if object_pairs_hook is not None: +        kw['object_pairs_hook'] = object_pairs_hook +    if parse_float is not None: +        kw['parse_float'] = parse_float +    if parse_int is not None: +        kw['parse_int'] = parse_int +    if parse_constant is not None: +        kw['parse_constant'] = parse_constant +    if use_decimal: +        if parse_float is not None: +            raise TypeError("use_decimal=True implies parse_float=Decimal") +        kw['parse_float'] = Decimal +    return cls(encoding=encoding, **kw).decode(s) + + +def _toggle_speedups(enabled): +    import simplejson.decoder as dec +    import simplejson.encoder as enc +    import simplejson.scanner as scan +    c_make_encoder = _import_c_make_encoder() +    if enabled: +        dec.scanstring = dec.c_scanstring or dec.py_scanstring +        enc.c_make_encoder = c_make_encoder +        enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or  +            enc.py_encode_basestring_ascii) +        scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner +    else: +        dec.scanstring = dec.py_scanstring +        enc.c_make_encoder = None +        enc.encode_basestring_ascii = enc.py_encode_basestring_ascii +        scan.make_scanner = scan.py_make_scanner +    dec.make_scanner = scan.make_scanner +    global _default_decoder +    _default_decoder = JSONDecoder( +        encoding=None, +        object_hook=None, +        object_pairs_hook=None, +    ) +    global _default_encoder +    _default_encoder = JSONEncoder( +       skipkeys=False, +       ensure_ascii=True, +       check_circular=True, +       allow_nan=True, +       indent=None, +       separators=None, +       encoding='utf-8', +       default=None, +   ) diff --git a/lib/simplejson/decoder.py b/lib/simplejson/decoder.py new file mode 100644 index 000000000..e5496d6e7 --- /dev/null +++ b/lib/simplejson/decoder.py @@ -0,0 +1,421 @@ +"""Implementation of JSONDecoder +""" +import re +import sys +import struct + +from simplejson.scanner import make_scanner +def _import_c_scanstring(): +    try: +        from simplejson._speedups import scanstring +        return scanstring +    except ImportError: +        return None +c_scanstring = _import_c_scanstring() + +__all__ = ['JSONDecoder'] + +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL + +def _floatconstants(): +    _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') +    # The struct module in Python 2.4 would get frexp() out of range here +    # when an endian is specified in the format string. Fixed in Python 2.5+ +    if sys.byteorder != 'big': +        _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] +    nan, inf = struct.unpack('dd', _BYTES) +    return nan, inf, -inf + +NaN, PosInf, NegInf = _floatconstants() + + +class JSONDecodeError(ValueError): +    """Subclass of ValueError with the following additional properties: + +    msg: The unformatted error message +    doc: The JSON document being parsed +    pos: The start index of doc where parsing failed +    end: The end index of doc where parsing failed (may be None) +    lineno: The line corresponding to pos +    colno: The column corresponding to pos +    endlineno: The line corresponding to end (may be None) +    endcolno: The column corresponding to end (may be None) + +    """ +    def __init__(self, msg, doc, pos, end=None): +        ValueError.__init__(self, errmsg(msg, doc, pos, end=end)) +        self.msg = msg +        self.doc = doc +        self.pos = pos +        self.end = end +        self.lineno, self.colno = linecol(doc, pos) +        if end is not None: +            self.endlineno, self.endcolno = linecol(doc, end) +        else: +            self.endlineno, self.endcolno = None, None + + +def linecol(doc, pos): +    lineno = doc.count('\n', 0, pos) + 1 +    if lineno == 1: +        colno = pos +    else: +        colno = pos - doc.rindex('\n', 0, pos) +    return lineno, colno + + +def errmsg(msg, doc, pos, end=None): +    # Note that this function is called from _speedups +    lineno, colno = linecol(doc, pos) +    if end is None: +        #fmt = '{0}: line {1} column {2} (char {3})' +        #return fmt.format(msg, lineno, colno, pos) +        fmt = '%s: line %d column %d (char %d)' +        return fmt % (msg, lineno, colno, pos) +    endlineno, endcolno = linecol(doc, end) +    #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' +    #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) +    fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' +    return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) + + +_CONSTANTS = { +    '-Infinity': NegInf, +    'Infinity': PosInf, +    'NaN': NaN, +} + +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) +BACKSLASH = { +    '"': u'"', '\\': u'\\', '/': u'/', +    'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', +} + +DEFAULT_ENCODING = "utf-8" + +def py_scanstring(s, end, encoding=None, strict=True, +        _b=BACKSLASH, _m=STRINGCHUNK.match): +    """Scan the string s for a JSON string. End is the index of the +    character in s after the quote that started the JSON string. +    Unescapes all valid JSON string escape sequences and raises ValueError +    on attempt to decode an invalid string. If strict is False then literal +    control characters are allowed in the string. + +    Returns a tuple of the decoded string and the index of the character in s +    after the end quote.""" +    if encoding is None: +        encoding = DEFAULT_ENCODING +    chunks = [] +    _append = chunks.append +    begin = end - 1 +    while 1: +        chunk = _m(s, end) +        if chunk is None: +            raise JSONDecodeError( +                "Unterminated string starting at", s, begin) +        end = chunk.end() +        content, terminator = chunk.groups() +        # Content is contains zero or more unescaped string characters +        if content: +            if not isinstance(content, unicode): +                content = unicode(content, encoding) +            _append(content) +        # Terminator is the end of string, a literal control character, +        # or a backslash denoting that an escape sequence follows +        if terminator == '"': +            break +        elif terminator != '\\': +            if strict: +                msg = "Invalid control character %r at" % (terminator,) +                #msg = "Invalid control character {0!r} at".format(terminator) +                raise JSONDecodeError(msg, s, end) +            else: +                _append(terminator) +                continue +        try: +            esc = s[end] +        except IndexError: +            raise JSONDecodeError( +                "Unterminated string starting at", s, begin) +        # If not a unicode escape sequence, must be in the lookup table +        if esc != 'u': +            try: +                char = _b[esc] +            except KeyError: +                msg = "Invalid \\escape: " + repr(esc) +                raise JSONDecodeError(msg, s, end) +            end += 1 +        else: +            # Unicode escape sequence +            esc = s[end + 1:end + 5] +            next_end = end + 5 +            if len(esc) != 4: +                msg = "Invalid \\uXXXX escape" +                raise JSONDecodeError(msg, s, end) +            uni = int(esc, 16) +            # Check for surrogate pair on UCS-4 systems +            if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: +                msg = "Invalid \\uXXXX\\uXXXX surrogate pair" +                if not s[end + 5:end + 7] == '\\u': +                    raise JSONDecodeError(msg, s, end) +                esc2 = s[end + 7:end + 11] +                if len(esc2) != 4: +                    raise JSONDecodeError(msg, s, end) +                uni2 = int(esc2, 16) +                uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) +                next_end += 6 +            char = unichr(uni) +            end = next_end +        # Append the unescaped character +        _append(char) +    return u''.join(chunks), end + + +# Use speedup if available +scanstring = c_scanstring or py_scanstring + +WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) +WHITESPACE_STR = ' \t\n\r' + +def JSONObject((s, end), encoding, strict, scan_once, object_hook, +        object_pairs_hook, memo=None, +        _w=WHITESPACE.match, _ws=WHITESPACE_STR): +    # Backwards compatibility +    if memo is None: +        memo = {} +    memo_get = memo.setdefault +    pairs = [] +    # Use a slice to prevent IndexError from being raised, the following +    # check will raise a more specific ValueError if the string is empty +    nextchar = s[end:end + 1] +    # Normally we expect nextchar == '"' +    if nextchar != '"': +        if nextchar in _ws: +            end = _w(s, end).end() +            nextchar = s[end:end + 1] +        # Trivial empty object +        if nextchar == '}': +            if object_pairs_hook is not None: +                result = object_pairs_hook(pairs) +                return result, end + 1 +            pairs = {} +            if object_hook is not None: +                pairs = object_hook(pairs) +            return pairs, end + 1 +        elif nextchar != '"': +            raise JSONDecodeError("Expecting property name", s, end) +    end += 1 +    while True: +        key, end = scanstring(s, end, encoding, strict) +        key = memo_get(key, key) + +        # To skip some function call overhead we optimize the fast paths where +        # the JSON key separator is ": " or just ":". +        if s[end:end + 1] != ':': +            end = _w(s, end).end() +            if s[end:end + 1] != ':': +                raise JSONDecodeError("Expecting : delimiter", s, end) + +        end += 1 + +        try: +            if s[end] in _ws: +                end += 1 +                if s[end] in _ws: +                    end = _w(s, end + 1).end() +        except IndexError: +            pass + +        try: +            value, end = scan_once(s, end) +        except StopIteration: +            raise JSONDecodeError("Expecting object", s, end) +        pairs.append((key, value)) + +        try: +            nextchar = s[end] +            if nextchar in _ws: +                end = _w(s, end + 1).end() +                nextchar = s[end] +        except IndexError: +            nextchar = '' +        end += 1 + +        if nextchar == '}': +            break +        elif nextchar != ',': +            raise JSONDecodeError("Expecting , delimiter", s, end - 1) + +        try: +            nextchar = s[end] +            if nextchar in _ws: +                end += 1 +                nextchar = s[end] +                if nextchar in _ws: +                    end = _w(s, end + 1).end() +                    nextchar = s[end] +        except IndexError: +            nextchar = '' + +        end += 1 +        if nextchar != '"': +            raise JSONDecodeError("Expecting property name", s, end - 1) + +    if object_pairs_hook is not None: +        result = object_pairs_hook(pairs) +        return result, end +    pairs = dict(pairs) +    if object_hook is not None: +        pairs = object_hook(pairs) +    return pairs, end + +def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): +    values = [] +    nextchar = s[end:end + 1] +    if nextchar in _ws: +        end = _w(s, end + 1).end() +        nextchar = s[end:end + 1] +    # Look-ahead for trivial empty array +    if nextchar == ']': +        return values, end + 1 +    _append = values.append +    while True: +        try: +            value, end = scan_once(s, end) +        except StopIteration: +            raise JSONDecodeError("Expecting object", s, end) +        _append(value) +        nextchar = s[end:end + 1] +        if nextchar in _ws: +            end = _w(s, end + 1).end() +            nextchar = s[end:end + 1] +        end += 1 +        if nextchar == ']': +            break +        elif nextchar != ',': +            raise JSONDecodeError("Expecting , delimiter", s, end) + +        try: +            if s[end] in _ws: +                end += 1 +                if s[end] in _ws: +                    end = _w(s, end + 1).end() +        except IndexError: +            pass + +    return values, end + +class JSONDecoder(object): +    """Simple JSON <http://json.org> decoder + +    Performs the following translations in decoding by default: + +    +---------------+-------------------+ +    | JSON          | Python            | +    +===============+===================+ +    | object        | dict              | +    +---------------+-------------------+ +    | array         | list              | +    +---------------+-------------------+ +    | string        | unicode           | +    +---------------+-------------------+ +    | number (int)  | int, long         | +    +---------------+-------------------+ +    | number (real) | float             | +    +---------------+-------------------+ +    | true          | True              | +    +---------------+-------------------+ +    | false         | False             | +    +---------------+-------------------+ +    | null          | None              | +    +---------------+-------------------+ + +    It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as +    their corresponding ``float`` values, which is outside the JSON spec. + +    """ + +    def __init__(self, encoding=None, object_hook=None, parse_float=None, +            parse_int=None, parse_constant=None, strict=True, +            object_pairs_hook=None): +        """ +        *encoding* determines the encoding used to interpret any +        :class:`str` objects decoded by this instance (``'utf-8'`` by +        default).  It has no effect when decoding :class:`unicode` objects. + +        Note that currently only encodings that are a superset of ASCII work, +        strings of other encodings should be passed in as :class:`unicode`. + +        *object_hook*, if specified, will be called with the result of every +        JSON object decoded and its return value will be used in place of the +        given :class:`dict`.  This can be used to provide custom +        deserializations (e.g. to support JSON-RPC class hinting). + +        *object_pairs_hook* is an optional function that will be called with +        the result of any object literal decode with an ordered list of pairs. +        The return value of *object_pairs_hook* will be used instead of the +        :class:`dict`.  This feature can be used to implement custom decoders +        that rely on the order that the key and value pairs are decoded (for +        example, :func:`collections.OrderedDict` will remember the order of +        insertion). If *object_hook* is also defined, the *object_pairs_hook* +        takes priority. + +        *parse_float*, if specified, will be called with the string of every +        JSON float to be decoded.  By default, this is equivalent to +        ``float(num_str)``. This can be used to use another datatype or parser +        for JSON floats (e.g. :class:`decimal.Decimal`). + +        *parse_int*, if specified, will be called with the string of every +        JSON int to be decoded.  By default, this is equivalent to +        ``int(num_str)``.  This can be used to use another datatype or parser +        for JSON integers (e.g. :class:`float`). + +        *parse_constant*, if specified, will be called with one of the +        following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``.  This +        can be used to raise an exception if invalid JSON numbers are +        encountered. + +        *strict* controls the parser's behavior when it encounters an +        invalid control character in a string. The default setting of +        ``True`` means that unescaped control characters are parse errors, if +        ``False`` then control characters will be allowed in strings. + +        """ +        self.encoding = encoding +        self.object_hook = object_hook +        self.object_pairs_hook = object_pairs_hook +        self.parse_float = parse_float or float +        self.parse_int = parse_int or int +        self.parse_constant = parse_constant or _CONSTANTS.__getitem__ +        self.strict = strict +        self.parse_object = JSONObject +        self.parse_array = JSONArray +        self.parse_string = scanstring +        self.memo = {} +        self.scan_once = make_scanner(self) + +    def decode(self, s, _w=WHITESPACE.match): +        """Return the Python representation of ``s`` (a ``str`` or ``unicode`` +        instance containing a JSON document) + +        """ +        obj, end = self.raw_decode(s, idx=_w(s, 0).end()) +        end = _w(s, end).end() +        if end != len(s): +            raise JSONDecodeError("Extra data", s, end, len(s)) +        return obj + +    def raw_decode(self, s, idx=0): +        """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` +        beginning with a JSON document) and return a 2-tuple of the Python +        representation and the index in ``s`` where the document ended. + +        This can be used to decode a JSON document from a string that may +        have extraneous data at the end. + +        """ +        try: +            obj, end = self.scan_once(s, idx) +        except StopIteration: +            raise JSONDecodeError("No JSON object could be decoded", s, idx) +        return obj, end diff --git a/lib/simplejson/encoder.py b/lib/simplejson/encoder.py new file mode 100644 index 000000000..5ec7440f1 --- /dev/null +++ b/lib/simplejson/encoder.py @@ -0,0 +1,534 @@ +"""Implementation of JSONEncoder +""" +import re +from decimal import Decimal + +def _import_speedups(): +    try: +        from simplejson import _speedups +        return _speedups.encode_basestring_ascii, _speedups.make_encoder +    except ImportError: +        return None, None +c_encode_basestring_ascii, c_make_encoder = _import_speedups() + +from simplejson.decoder import PosInf + +ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') +HAS_UTF8 = re.compile(r'[\x80-\xff]') +ESCAPE_DCT = { +    '\\': '\\\\', +    '"': '\\"', +    '\b': '\\b', +    '\f': '\\f', +    '\n': '\\n', +    '\r': '\\r', +    '\t': '\\t', +    u'\u2028': '\\u2028', +    u'\u2029': '\\u2029', +} +for i in range(0x20): +    #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) +    ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) + +FLOAT_REPR = repr + +def encode_basestring(s): +    """Return a JSON representation of a Python string + +    """ +    if isinstance(s, str) and HAS_UTF8.search(s) is not None: +        s = s.decode('utf-8') +    def replace(match): +        return ESCAPE_DCT[match.group(0)] +    return u'"' + ESCAPE.sub(replace, s) + u'"' + + +def py_encode_basestring_ascii(s): +    """Return an ASCII-only JSON representation of a Python string + +    """ +    if isinstance(s, str) and HAS_UTF8.search(s) is not None: +        s = s.decode('utf-8') +    def replace(match): +        s = match.group(0) +        try: +            return ESCAPE_DCT[s] +        except KeyError: +            n = ord(s) +            if n < 0x10000: +                #return '\\u{0:04x}'.format(n) +                return '\\u%04x' % (n,) +            else: +                # surrogate pair +                n -= 0x10000 +                s1 = 0xd800 | ((n >> 10) & 0x3ff) +                s2 = 0xdc00 | (n & 0x3ff) +                #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) +                return '\\u%04x\\u%04x' % (s1, s2) +    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + + +encode_basestring_ascii = ( +    c_encode_basestring_ascii or py_encode_basestring_ascii) + +class JSONEncoder(object): +    """Extensible JSON <http://json.org> encoder for Python data structures. + +    Supports the following objects and types by default: + +    +-------------------+---------------+ +    | Python            | JSON          | +    +===================+===============+ +    | dict, namedtuple  | object        | +    +-------------------+---------------+ +    | list, tuple       | array         | +    +-------------------+---------------+ +    | str, unicode      | string        | +    +-------------------+---------------+ +    | int, long, float  | number        | +    +-------------------+---------------+ +    | True              | true          | +    +-------------------+---------------+ +    | False             | false         | +    +-------------------+---------------+ +    | None              | null          | +    +-------------------+---------------+ + +    To extend this to recognize other objects, subclass and implement a +    ``.default()`` method with another method that returns a serializable +    object for ``o`` if possible, otherwise it should call the superclass +    implementation (to raise ``TypeError``). + +    """ +    item_separator = ', ' +    key_separator = ': ' +    def __init__(self, skipkeys=False, ensure_ascii=True, +            check_circular=True, allow_nan=True, sort_keys=False, +            indent=None, separators=None, encoding='utf-8', default=None, +            use_decimal=True, namedtuple_as_object=True, +            tuple_as_array=True): +        """Constructor for JSONEncoder, with sensible defaults. + +        If skipkeys is false, then it is a TypeError to attempt +        encoding of keys that are not str, int, long, float or None.  If +        skipkeys is True, such items are simply skipped. + +        If ensure_ascii is true, the output is guaranteed to be str +        objects with all incoming unicode characters escaped.  If +        ensure_ascii is false, the output will be unicode object. + +        If check_circular is true, then lists, dicts, and custom encoded +        objects will be checked for circular references during encoding to +        prevent an infinite recursion (which would cause an OverflowError). +        Otherwise, no such check takes place. + +        If allow_nan is true, then NaN, Infinity, and -Infinity will be +        encoded as such.  This behavior is not JSON specification compliant, +        but is consistent with most JavaScript based encoders and decoders. +        Otherwise, it will be a ValueError to encode such floats. + +        If sort_keys is true, then the output of dictionaries will be +        sorted by key; this is useful for regression tests to ensure +        that JSON serializations can be compared on a day-to-day basis. + +        If indent is a string, then JSON array elements and object members +        will be pretty-printed with a newline followed by that string repeated +        for each level of nesting. ``None`` (the default) selects the most compact +        representation without any newlines. For backwards compatibility with +        versions of simplejson earlier than 2.1.0, an integer is also accepted +        and is converted to a string with that many spaces. + +        If specified, separators should be a (item_separator, key_separator) +        tuple.  The default is (', ', ': ').  To get the most compact JSON +        representation you should specify (',', ':') to eliminate whitespace. + +        If specified, default is a function that gets called for objects +        that can't otherwise be serialized.  It should return a JSON encodable +        version of the object or raise a ``TypeError``. + +        If encoding is not None, then all input strings will be +        transformed into unicode using that encoding prior to JSON-encoding. +        The default is UTF-8. + +        If use_decimal is true (not the default), ``decimal.Decimal`` will +        be supported directly by the encoder. For the inverse, decode JSON +        with ``parse_float=decimal.Decimal``. + +        If namedtuple_as_object is true (the default), tuple subclasses with +        ``_asdict()`` methods will be encoded as JSON objects. +         +        If tuple_as_array is true (the default), tuple (and subclasses) will +        be encoded as JSON arrays. +        """ + +        self.skipkeys = skipkeys +        self.ensure_ascii = ensure_ascii +        self.check_circular = check_circular +        self.allow_nan = allow_nan +        self.sort_keys = sort_keys +        self.use_decimal = use_decimal +        self.namedtuple_as_object = namedtuple_as_object +        self.tuple_as_array = tuple_as_array +        if isinstance(indent, (int, long)): +            indent = ' ' * indent +        self.indent = indent +        if separators is not None: +            self.item_separator, self.key_separator = separators +        elif indent is not None: +            self.item_separator = ',' +        if default is not None: +            self.default = default +        self.encoding = encoding + +    def default(self, o): +        """Implement this method in a subclass such that it returns +        a serializable object for ``o``, or calls the base implementation +        (to raise a ``TypeError``). + +        For example, to support arbitrary iterators, you could +        implement default like this:: + +            def default(self, o): +                try: +                    iterable = iter(o) +                except TypeError: +                    pass +                else: +                    return list(iterable) +                return JSONEncoder.default(self, o) + +        """ +        raise TypeError(repr(o) + " is not JSON serializable") + +    def encode(self, o): +        """Return a JSON string representation of a Python data structure. + +        >>> from simplejson import JSONEncoder +        >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) +        '{"foo": ["bar", "baz"]}' + +        """ +        # This is for extremely simple cases and benchmarks. +        if isinstance(o, basestring): +            if isinstance(o, str): +                _encoding = self.encoding +                if (_encoding is not None +                        and not (_encoding == 'utf-8')): +                    o = o.decode(_encoding) +            if self.ensure_ascii: +                return encode_basestring_ascii(o) +            else: +                return encode_basestring(o) +        # This doesn't pass the iterator directly to ''.join() because the +        # exceptions aren't as detailed.  The list call should be roughly +        # equivalent to the PySequence_Fast that ''.join() would do. +        chunks = self.iterencode(o, _one_shot=True) +        if not isinstance(chunks, (list, tuple)): +            chunks = list(chunks) +        if self.ensure_ascii: +            return ''.join(chunks) +        else: +            return u''.join(chunks) + +    def iterencode(self, o, _one_shot=False): +        """Encode the given object and yield each string +        representation as available. + +        For example:: + +            for chunk in JSONEncoder().iterencode(bigobject): +                mysocket.write(chunk) + +        """ +        if self.check_circular: +            markers = {} +        else: +            markers = None +        if self.ensure_ascii: +            _encoder = encode_basestring_ascii +        else: +            _encoder = encode_basestring +        if self.encoding != 'utf-8': +            def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): +                if isinstance(o, str): +                    o = o.decode(_encoding) +                return _orig_encoder(o) + +        def floatstr(o, allow_nan=self.allow_nan, +                _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf): +            # Check for specials. Note that this type of test is processor +            # and/or platform-specific, so do tests which don't depend on +            # the internals. + +            if o != o: +                text = 'NaN' +            elif o == _inf: +                text = 'Infinity' +            elif o == _neginf: +                text = '-Infinity' +            else: +                return _repr(o) + +            if not allow_nan: +                raise ValueError( +                    "Out of range float values are not JSON compliant: " + +                    repr(o)) + +            return text + + +        key_memo = {} +        if (_one_shot and c_make_encoder is not None +                and self.indent is None): +            _iterencode = c_make_encoder( +                markers, self.default, _encoder, self.indent, +                self.key_separator, self.item_separator, self.sort_keys, +                self.skipkeys, self.allow_nan, key_memo, self.use_decimal, +                self.namedtuple_as_object, self.tuple_as_array) +        else: +            _iterencode = _make_iterencode( +                markers, self.default, _encoder, self.indent, floatstr, +                self.key_separator, self.item_separator, self.sort_keys, +                self.skipkeys, _one_shot, self.use_decimal, +                self.namedtuple_as_object, self.tuple_as_array) +        try: +            return _iterencode(o, 0) +        finally: +            key_memo.clear() + + +class JSONEncoderForHTML(JSONEncoder): +    """An encoder that produces JSON safe to embed in HTML. + +    To embed JSON content in, say, a script tag on a web page, the +    characters &, < and > should be escaped. They cannot be escaped +    with the usual entities (e.g. &) because they are not expanded +    within <script> tags. +    """ + +    def encode(self, o): +        # Override JSONEncoder.encode because it has hacks for +        # performance that make things more complicated. +        chunks = self.iterencode(o, True) +        if self.ensure_ascii: +            return ''.join(chunks) +        else: +            return u''.join(chunks) + +    def iterencode(self, o, _one_shot=False): +        chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot) +        for chunk in chunks: +            chunk = chunk.replace('&', '\\u0026') +            chunk = chunk.replace('<', '\\u003c') +            chunk = chunk.replace('>', '\\u003e') +            yield chunk + + +def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, +        _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, +        _use_decimal, _namedtuple_as_object, _tuple_as_array, +        ## HACK: hand-optimized bytecode; turn globals into locals +        False=False, +        True=True, +        ValueError=ValueError, +        basestring=basestring, +        Decimal=Decimal, +        dict=dict, +        float=float, +        id=id, +        int=int, +        isinstance=isinstance, +        list=list, +        long=long, +        str=str, +        tuple=tuple, +    ): + +    def _iterencode_list(lst, _current_indent_level): +        if not lst: +            yield '[]' +            return +        if markers is not None: +            markerid = id(lst) +            if markerid in markers: +                raise ValueError("Circular reference detected") +            markers[markerid] = lst +        buf = '[' +        if _indent is not None: +            _current_indent_level += 1 +            newline_indent = '\n' + (_indent * _current_indent_level) +            separator = _item_separator + newline_indent +            buf += newline_indent +        else: +            newline_indent = None +            separator = _item_separator +        first = True +        for value in lst: +            if first: +                first = False +            else: +                buf = separator +            if isinstance(value, basestring): +                yield buf + _encoder(value) +            elif value is None: +                yield buf + 'null' +            elif value is True: +                yield buf + 'true' +            elif value is False: +                yield buf + 'false' +            elif isinstance(value, (int, long)): +                yield buf + str(value) +            elif isinstance(value, float): +                yield buf + _floatstr(value) +            elif _use_decimal and isinstance(value, Decimal): +                yield buf + str(value) +            else: +                yield buf +                if isinstance(value, list): +                    chunks = _iterencode_list(value, _current_indent_level) +                elif (_namedtuple_as_object and isinstance(value, tuple) and +                        hasattr(value, '_asdict')): +                    chunks = _iterencode_dict(value._asdict(), +                                              _current_indent_level) +                elif _tuple_as_array and isinstance(value, tuple): +                    chunks = _iterencode_list(value, _current_indent_level) +                elif isinstance(value, dict): +                    chunks = _iterencode_dict(value, _current_indent_level) +                else: +                    chunks = _iterencode(value, _current_indent_level) +                for chunk in chunks: +                    yield chunk +        if newline_indent is not None: +            _current_indent_level -= 1 +            yield '\n' + (_indent * _current_indent_level) +        yield ']' +        if markers is not None: +            del markers[markerid] + +    def _iterencode_dict(dct, _current_indent_level): +        if not dct: +            yield '{}' +            return +        if markers is not None: +            markerid = id(dct) +            if markerid in markers: +                raise ValueError("Circular reference detected") +            markers[markerid] = dct +        yield '{' +        if _indent is not None: +            _current_indent_level += 1 +            newline_indent = '\n' + (_indent * _current_indent_level) +            item_separator = _item_separator + newline_indent +            yield newline_indent +        else: +            newline_indent = None +            item_separator = _item_separator +        first = True +        if _sort_keys: +            items = dct.items() +            items.sort(key=lambda kv: kv[0]) +        else: +            items = dct.iteritems() +        for key, value in items: +            if isinstance(key, basestring): +                pass +            # JavaScript is weakly typed for these, so it makes sense to +            # also allow them.  Many encoders seem to do something like this. +            elif isinstance(key, float): +                key = _floatstr(key) +            elif key is True: +                key = 'true' +            elif key is False: +                key = 'false' +            elif key is None: +                key = 'null' +            elif isinstance(key, (int, long)): +                key = str(key) +            elif _skipkeys: +                continue +            else: +                raise TypeError("key " + repr(key) + " is not a string") +            if first: +                first = False +            else: +                yield item_separator +            yield _encoder(key) +            yield _key_separator +            if isinstance(value, basestring): +                yield _encoder(value) +            elif value is None: +                yield 'null' +            elif value is True: +                yield 'true' +            elif value is False: +                yield 'false' +            elif isinstance(value, (int, long)): +                yield str(value) +            elif isinstance(value, float): +                yield _floatstr(value) +            elif _use_decimal and isinstance(value, Decimal): +                yield str(value) +            else: +                if isinstance(value, list): +                    chunks = _iterencode_list(value, _current_indent_level) +                elif (_namedtuple_as_object and isinstance(value, tuple) and +                        hasattr(value, '_asdict')): +                    chunks = _iterencode_dict(value._asdict(), +                                              _current_indent_level) +                elif _tuple_as_array and isinstance(value, tuple): +                    chunks = _iterencode_list(value, _current_indent_level) +                elif isinstance(value, dict): +                    chunks = _iterencode_dict(value, _current_indent_level) +                else: +                    chunks = _iterencode(value, _current_indent_level) +                for chunk in chunks: +                    yield chunk +        if newline_indent is not None: +            _current_indent_level -= 1 +            yield '\n' + (_indent * _current_indent_level) +        yield '}' +        if markers is not None: +            del markers[markerid] + +    def _iterencode(o, _current_indent_level): +        if isinstance(o, basestring): +            yield _encoder(o) +        elif o is None: +            yield 'null' +        elif o is True: +            yield 'true' +        elif o is False: +            yield 'false' +        elif isinstance(o, (int, long)): +            yield str(o) +        elif isinstance(o, float): +            yield _floatstr(o) +        elif isinstance(o, list): +            for chunk in _iterencode_list(o, _current_indent_level): +                yield chunk +        elif (_namedtuple_as_object and isinstance(o, tuple) and +                hasattr(o, '_asdict')): +            for chunk in _iterencode_dict(o._asdict(), _current_indent_level): +                yield chunk +        elif (_tuple_as_array and isinstance(o, tuple)): +            for chunk in _iterencode_list(o, _current_indent_level): +                yield chunk +        elif isinstance(o, dict): +            for chunk in _iterencode_dict(o, _current_indent_level): +                yield chunk +        elif _use_decimal and isinstance(o, Decimal): +            yield str(o) +        else: +            if markers is not None: +                markerid = id(o) +                if markerid in markers: +                    raise ValueError("Circular reference detected") +                markers[markerid] = o +            o = _default(o) +            for chunk in _iterencode(o, _current_indent_level): +                yield chunk +            if markers is not None: +                del markers[markerid] + +    return _iterencode diff --git a/lib/simplejson/ordered_dict.py b/lib/simplejson/ordered_dict.py new file mode 100644 index 000000000..87ad88824 --- /dev/null +++ b/lib/simplejson/ordered_dict.py @@ -0,0 +1,119 @@ +"""Drop-in replacement for collections.OrderedDict by Raymond Hettinger + +http://code.activestate.com/recipes/576693/ + +""" +from UserDict import DictMixin + +# Modified from original to support Python 2.4, see +# http://code.google.com/p/simplejson/issues/detail?id=53 +try: +    all +except NameError: +    def all(seq): +        for elem in seq: +            if not elem: +                return False +        return True + +class OrderedDict(dict, DictMixin): + +    def __init__(self, *args, **kwds): +        if len(args) > 1: +            raise TypeError('expected at most 1 arguments, got %d' % len(args)) +        try: +            self.__end +        except AttributeError: +            self.clear() +        self.update(*args, **kwds) + +    def clear(self): +        self.__end = end = [] +        end += [None, end, end]         # sentinel node for doubly linked list +        self.__map = {}                 # key --> [key, prev, next] +        dict.clear(self) + +    def __setitem__(self, key, value): +        if key not in self: +            end = self.__end +            curr = end[1] +            curr[2] = end[1] = self.__map[key] = [key, curr, end] +        dict.__setitem__(self, key, value) + +    def __delitem__(self, key): +        dict.__delitem__(self, key) +        key, prev, next = self.__map.pop(key) +        prev[2] = next +        next[1] = prev + +    def __iter__(self): +        end = self.__end +        curr = end[2] +        while curr is not end: +            yield curr[0] +            curr = curr[2] + +    def __reversed__(self): +        end = self.__end +        curr = end[1] +        while curr is not end: +            yield curr[0] +            curr = curr[1] + +    def popitem(self, last=True): +        if not self: +            raise KeyError('dictionary is empty') +        # Modified from original to support Python 2.4, see +        # http://code.google.com/p/simplejson/issues/detail?id=53 +        if last: +            key = reversed(self).next() +        else: +            key = iter(self).next() +        value = self.pop(key) +        return key, value + +    def __reduce__(self): +        items = [[k, self[k]] for k in self] +        tmp = self.__map, self.__end +        del self.__map, self.__end +        inst_dict = vars(self).copy() +        self.__map, self.__end = tmp +        if inst_dict: +            return (self.__class__, (items,), inst_dict) +        return self.__class__, (items,) + +    def keys(self): +        return list(self) + +    setdefault = DictMixin.setdefault +    update = DictMixin.update +    pop = DictMixin.pop +    values = DictMixin.values +    items = DictMixin.items +    iterkeys = DictMixin.iterkeys +    itervalues = DictMixin.itervalues +    iteritems = DictMixin.iteritems + +    def __repr__(self): +        if not self: +            return '%s()' % (self.__class__.__name__,) +        return '%s(%r)' % (self.__class__.__name__, self.items()) + +    def copy(self): +        return self.__class__(self) + +    @classmethod +    def fromkeys(cls, iterable, value=None): +        d = cls() +        for key in iterable: +            d[key] = value +        return d + +    def __eq__(self, other): +        if isinstance(other, OrderedDict): +            return len(self)==len(other) and \ +                   all(p==q for p, q in  zip(self.items(), other.items())) +        return dict.__eq__(self, other) + +    def __ne__(self, other): +        return not self == other diff --git a/lib/simplejson/scanner.py b/lib/simplejson/scanner.py new file mode 100644 index 000000000..54593a371 --- /dev/null +++ b/lib/simplejson/scanner.py @@ -0,0 +1,77 @@ +"""JSON token scanner +""" +import re +def _import_c_make_scanner(): +    try: +        from simplejson._speedups import make_scanner +        return make_scanner +    except ImportError: +        return None +c_make_scanner = _import_c_make_scanner() + +__all__ = ['make_scanner'] + +NUMBER_RE = re.compile( +    r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', +    (re.VERBOSE | re.MULTILINE | re.DOTALL)) + +def py_make_scanner(context): +    parse_object = context.parse_object +    parse_array = context.parse_array +    parse_string = context.parse_string +    match_number = NUMBER_RE.match +    encoding = context.encoding +    strict = context.strict +    parse_float = context.parse_float +    parse_int = context.parse_int +    parse_constant = context.parse_constant +    object_hook = context.object_hook +    object_pairs_hook = context.object_pairs_hook +    memo = context.memo + +    def _scan_once(string, idx): +        try: +            nextchar = string[idx] +        except IndexError: +            raise StopIteration + +        if nextchar == '"': +            return parse_string(string, idx + 1, encoding, strict) +        elif nextchar == '{': +            return parse_object((string, idx + 1), encoding, strict, +                _scan_once, object_hook, object_pairs_hook, memo) +        elif nextchar == '[': +            return parse_array((string, idx + 1), _scan_once) +        elif nextchar == 'n' and string[idx:idx + 4] == 'null': +            return None, idx + 4 +        elif nextchar == 't' and string[idx:idx + 4] == 'true': +            return True, idx + 4 +        elif nextchar == 'f' and string[idx:idx + 5] == 'false': +            return False, idx + 5 + +        m = match_number(string, idx) +        if m is not None: +            integer, frac, exp = m.groups() +            if frac or exp: +                res = parse_float(integer + (frac or '') + (exp or '')) +            else: +                res = parse_int(integer) +            return res, m.end() +        elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': +            return parse_constant('NaN'), idx + 3 +        elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': +            return parse_constant('Infinity'), idx + 8 +        elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': +            return parse_constant('-Infinity'), idx + 9 +        else: +            raise StopIteration + +    def scan_once(string, idx): +        try: +            return _scan_once(string, idx) +        finally: +            memo.clear() + +    return scan_once + +make_scanner = c_make_scanner or py_make_scanner diff --git a/lib/simplejson/tool.py b/lib/simplejson/tool.py new file mode 100644 index 000000000..73370db55 --- /dev/null +++ b/lib/simplejson/tool.py @@ -0,0 +1,39 @@ +r"""Command-line tool to validate and pretty-print JSON + +Usage:: + +    $ echo '{"json":"obj"}' | python -m simplejson.tool +    { +        "json": "obj" +    } +    $ echo '{ 1.2:3.4}' | python -m simplejson.tool +    Expecting property name: line 1 column 2 (char 2) + +""" +import sys +import simplejson as json + +def main(): +    if len(sys.argv) == 1: +        infile = sys.stdin +        outfile = sys.stdout +    elif len(sys.argv) == 2: +        infile = open(sys.argv[1], 'rb') +        outfile = sys.stdout +    elif len(sys.argv) == 3: +        infile = open(sys.argv[1], 'rb') +        outfile = open(sys.argv[2], 'wb') +    else: +        raise SystemExit(sys.argv[0] + " [infile [outfile]]") +    try: +        obj = json.load(infile, +                        object_pairs_hook=json.OrderedDict, +                        use_decimal=True) +    except ValueError, e: +        raise SystemExit(e) +    json.dump(obj, outfile, sort_keys=True, indent='    ', use_decimal=True) +    outfile.write('\n') + + +if __name__ == '__main__': +    main() | 
