“”“Implementation of JSONDecoder ”“” import re import sys import struct

from simplejson.scanner import make_scanner try:

from simplejson._speedups import scanstring as c_scanstring

except ImportError:

c_scanstring = None

__all__ = ['JSONDecoder']

FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL

def _floatconstants():

_BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
if sys.byteorder != 'big':
    _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
nan, inf = struct.unpack('dd', _BYTES)
return nan, inf, -inf

NaN, PosInf, NegInf = _floatconstants()

def linecol(doc, pos):

lineno = doc.count('\n', 0, pos) + 1
if lineno == 1:
    colno = pos
else:
    colno = pos - doc.rindex('\n', 0, pos)
return lineno, colno

def errmsg(msg, doc, pos, end=None):

# Note that this function is called from _speedups
lineno, colno = linecol(doc, pos)
if end is None:
    #fmt = '{0}: line {1} column {2} (char {3})'
    #return fmt.format(msg, lineno, colno, pos)
    fmt = '%s: line %d column %d (char %d)'
    return fmt % (msg, lineno, colno, pos)
endlineno, endcolno = linecol(doc, end)
#fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
#return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)

_CONSTANTS = {

'-Infinity': NegInf,
'Infinity': PosInf,
'NaN': NaN,

}

STRINGCHUNK = re.compile(r’(.*?)()', FLAGS) BACKSLASH = {

'"': u'"', '\\': u'\\', '/': u'/',
'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',

}

DEFAULT_ENCODING = “utf-8”

def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):

"""Scan the string s for a JSON string. End is the index of the
character in s after the quote that started the JSON string.
Unescapes all valid JSON string escape sequences and raises ValueError
on attempt to decode an invalid string. If strict is False then literal
control characters are allowed in the string.

Returns a tuple of the decoded string and the index of the character in s
after the end quote."""
if encoding is None:
    encoding = DEFAULT_ENCODING
chunks = []
_append = chunks.append
begin = end - 1
while 1:
    chunk = _m(s, end)
    if chunk is None:
        raise ValueError(
            errmsg("Unterminated string starting at", s, begin))
    end = chunk.end()
    content, terminator = chunk.groups()
    # Content is contains zero or more unescaped string characters
    if content:
        if not isinstance(content, unicode):
            content = unicode(content, encoding)
        _append(content)
    # Terminator is the end of string, a literal control character,
    # or a backslash denoting that an escape sequence follows
    if terminator == '"':
        break
    elif terminator != '\\':
        if strict:
            msg = "Invalid control character %r at" % (terminator,)
            #msg = "Invalid control character {0!r} at".format(terminator)
            raise ValueError(errmsg(msg, s, end))
        else:
            _append(terminator)
            continue
    try:
        esc = s[end]
    except IndexError:
        raise ValueError(
            errmsg("Unterminated string starting at", s, begin))
    # If not a unicode escape sequence, must be in the lookup table
    if esc != 'u':
        try:
            char = _b[esc]
        except KeyError:
            msg = "Invalid \\escape: " + repr(esc)
            raise ValueError(errmsg(msg, s, end))
        end += 1
    else:
        # Unicode escape sequence
        esc = s[end + 1:end + 5]
        next_end = end + 5
        if len(esc) != 4:
            msg = "Invalid \\uXXXX escape"
            raise ValueError(errmsg(msg, s, end))
        uni = int(esc, 16)
        # Check for surrogate pair on UCS-4 systems
        if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
            msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
            if not s[end + 5:end + 7] == '\\u':
                raise ValueError(errmsg(msg, s, end))
            esc2 = s[end + 7:end + 11]
            if len(esc2) != 4:
                raise ValueError(errmsg(msg, s, end))
            uni2 = int(esc2, 16)
            uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
            next_end += 6
        char = unichr(uni)
        end = next_end
    # Append the unescaped character
    _append(char)
return u''.join(chunks), end

# Use speedup if available scanstring = c_scanstring or py_scanstring

WHITESPACE = re.compile(r'[ tnr]*', FLAGS) WHITESPACE_STR = ' tnr'

def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):

pairs = {}
# Use a slice to prevent IndexError from being raised, the following
# check will raise a more specific ValueError if the string is empty
nextchar = s[end:end + 1]
# Normally we expect nextchar == '"'
if nextchar != '"':
    if nextchar in _ws:
        end = _w(s, end).end()
        nextchar = s[end:end + 1]
    # Trivial empty object
    if nextchar == '}':
        return pairs, end + 1
    elif nextchar != '"':
        raise ValueError(errmsg("Expecting property name", s, end))
end += 1
while True:
    key, end = scanstring(s, end, encoding, strict)

    # To skip some function call overhead we optimize the fast paths where
    # the JSON key separator is ": " or just ":".
    if s[end:end + 1] != ':':
        end = _w(s, end).end()
        if s[end:end + 1] != ':':
            raise ValueError(errmsg("Expecting : delimiter", s, end))

    end += 1

    try:
        if s[end] in _ws:
            end += 1
            if s[end] in _ws:
                end = _w(s, end + 1).end()
    except IndexError:
        pass

    try:
        value, end = scan_once(s, end)
    except StopIteration:
        raise ValueError(errmsg("Expecting object", s, end))
    pairs[key] = value

    try:
        nextchar = s[end]
        if nextchar in _ws:
            end = _w(s, end + 1).end()
            nextchar = s[end]
    except IndexError:
        nextchar = ''
    end += 1

    if nextchar == '}':
        break
    elif nextchar != ',':
        raise ValueError(errmsg("Expecting , delimiter", s, end - 1))

    try:
        nextchar = s[end]
        if nextchar in _ws:
            end += 1
            nextchar = s[end]
            if nextchar in _ws:
                end = _w(s, end + 1).end()
                nextchar = s[end]
    except IndexError:
        nextchar = ''

    end += 1
    if nextchar != '"':
        raise ValueError(errmsg("Expecting property name", s, end - 1))

if object_hook is not None:
    pairs = object_hook(pairs)
return pairs, end

def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):

values = []
nextchar = s[end:end + 1]
if nextchar in _ws:
    end = _w(s, end + 1).end()
    nextchar = s[end:end + 1]
# Look-ahead for trivial empty array
if nextchar == ']':
    return values, end + 1
_append = values.append
while True:
    try:
        value, end = scan_once(s, end)
    except StopIteration:
        raise ValueError(errmsg("Expecting object", s, end))
    _append(value)
    nextchar = s[end:end + 1]
    if nextchar in _ws:
        end = _w(s, end + 1).end()
        nextchar = s[end:end + 1]
    end += 1
    if nextchar == ']':
        break
    elif nextchar != ',':
        raise ValueError(errmsg("Expecting , delimiter", s, end))

    try:
        if s[end] in _ws:
            end += 1
            if s[end] in _ws:
                end = _w(s, end + 1).end()
    except IndexError:
        pass

return values, end

class JSONDecoder(object):

"""Simple JSON <http://json.org> decoder

Performs the following translations in decoding by default:

+---------------+-------------------+
| JSON          | Python            |
+===============+===================+
| object        | dict              |
+---------------+-------------------+
| array         | list              |
+---------------+-------------------+
| string        | unicode           |
+---------------+-------------------+
| number (int)  | int, long         |
+---------------+-------------------+
| number (real) | float             |
+---------------+-------------------+
| true          | True              |
+---------------+-------------------+
| false         | False             |
+---------------+-------------------+
| null          | None              |
+---------------+-------------------+

It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
their corresponding ``float`` values, which is outside the JSON spec.

"""

def __init__(self, encoding=None, object_hook=None, parse_float=None,
        parse_int=None, parse_constant=None, strict=True):
    """``encoding`` determines the encoding used to interpret any ``str``
    objects decoded by this instance (utf-8 by default).  It has no
    effect when decoding ``unicode`` objects.

    Note that currently only encodings that are a superset of ASCII work,
    strings of other encodings should be passed in as ``unicode``.

    ``object_hook``, if specified, will be called with the result
    of every JSON object decoded and its return value will be used in
    place of the given ``dict``.  This can be used to provide custom
    deserializations (e.g. to support JSON-RPC class hinting).

    ``parse_float``, if specified, will be called with the string
    of every JSON float to be decoded. By default this is equivalent to
    float(num_str). This can be used to use another datatype or parser
    for JSON floats (e.g. decimal.Decimal).

    ``parse_int``, if specified, will be called with the string
    of every JSON int to be decoded. By default this is equivalent to
    int(num_str). This can be used to use another datatype or parser
    for JSON integers (e.g. float).

    ``parse_constant``, if specified, will be called with one of the
    following strings: -Infinity, Infinity, NaN.
    This can be used to raise an exception if invalid JSON numbers
    are encountered.

    """
    self.encoding = encoding
    self.object_hook = object_hook
    self.parse_float = parse_float or float
    self.parse_int = parse_int or int
    self.parse_constant = parse_constant or _CONSTANTS.__getitem__
    self.strict = strict
    self.parse_object = JSONObject
    self.parse_array = JSONArray
    self.parse_string = scanstring
    self.scan_once = make_scanner(self)

def decode(self, s, _w=WHITESPACE.match):
    """Return the Python representation of ``s`` (a ``str`` or ``unicode``
    instance containing a JSON document)

    """
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
    end = _w(s, end).end()
    if end != len(s):
        raise ValueError(errmsg("Extra data", s, end, len(s)))
    return obj

def raw_decode(self, s, idx=0):
    """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
    with a JSON document) and return a 2-tuple of the Python
    representation and the index in ``s`` where the document ended.

    This can be used to decode a JSON document from a string that may
    have extraneous data at the end.

    """
    try:
        obj, end = self.scan_once(s, idx)
    except StopIteration:
        raise ValueError("No JSON object could be decoded")
    return obj, end