class Muskox::Pure::Lexer

Constants

ARRAY_CLOSE
ARRAY_OPEN
COLLECTION_DELIMITER
EMPTY_8BIT_STRING
FALSE
FLOAT
IGNORE
INFINITY
INTEGER
MINUS_INFINITY
NAN
NULL
OBJECT_CLOSE
OBJECT_OPEN
PAIR_DELIMITER
STRING
TRUE
UNESCAPE_MAP

Unescape characters in strings.

UNPARSED

Public Class Methods

new(source, opts = {}) click to toggle source

Creates a new JSON::Pure::Parser instance for the string source.

It will be configured by the opts hash. opts can have the following keys:

  • max_nesting: The maximum depth of nesting allowed in the parsed data structures. Disable depth checking with :max_nesting => false|nil|0, it defaults to 100.

  • allow_nan: If set to true, allow NaN, Infinity and -Infinity in defiance of RFC 4627 to be parsed by the Parser. This option defaults to false.

  • symbolize_names: If set to true, returns symbols for the names (keys) in a JSON object. Otherwise strings are returned, which is also the default.

  • quirks_mode: Enables quirks_mode for parser, that is for example parsing single JSON values instead of documents is possible.

Calls superclass method
# File lib/muskox/json_lexer.rb, line 69
def initialize(source, opts = {})
  opts ||= {}
  unless @quirks_mode = opts[:quirks_mode]
    source = convert_encoding source
  end
  super source
  if !opts.key?(:max_nesting) # defaults to 100
    @max_nesting = 100
  elsif opts[:max_nesting]
    @max_nesting = opts[:max_nesting]
  else
    @max_nesting = 0
  end
  @allow_nan = !!opts[:allow_nan]
  @symbolize_names = !!opts[:symbolize_names]
  @match_string = opts[:match_string]
end

Public Instance Methods

lex(&block) click to toggle source

Parses the current JSON string source and returns the complete data structure as a result.

# File lib/muskox/json_lexer.rb, line 100
      def lex &block
        @callback = block
        reset
        if @quirks_mode
          while !eos? && skip(IGNORE)
          end
          if eos?
            raise ParserError, "source did not contain any JSON!"
          else
            obj = lex_value
            obj == UNPARSED and raise ParserError, "source did not contain any JSON!"
          end
        else
          until eos?
            case
            when scan(OBJECT_OPEN)
#              obj and raise ParserError, "source '#{peek(20)}' not in JSON!"
              @current_nesting = 1
              lex_object
            when scan(ARRAY_OPEN)
#              obj and raise ParserError, "source '#{peek(20)}' not in JSON!"
              @current_nesting = 1
              lex_array
            when skip(IGNORE)
              ;
            else
              raise ParserError, "source '#{peek(20)}' not in JSON!"
            end
          end
#          obj or raise ParserError, "source did not contain any JSON!"
        end
#        obj
      end
quirks_mode?() click to toggle source
# File lib/muskox/json_lexer.rb, line 89
def quirks_mode?
  !!@quirks_mode
end
reset() click to toggle source
Calls superclass method
# File lib/muskox/json_lexer.rb, line 93
def reset
  super
  @current_nesting = 0
end

Private Instance Methods

convert_encoding(source) click to toggle source
# File lib/muskox/json_lexer.rb, line 136
def convert_encoding(source)
  if source.respond_to?(:to_str)
    source = source.to_str
  else
    raise TypeError, "#{source.inspect} is not like a string"
  end
  if defined?(::Encoding)
    if source.encoding == ::Encoding::ASCII_8BIT
      b = source[0, 4].bytes.to_a
      source =
        case
        when b.size >= 4 && b[0] == 0 && b[1] == 0 && b[2] == 0
          source.dup.force_encoding(::Encoding::UTF_32BE).encode!(::Encoding::UTF_8)
        when b.size >= 4 && b[0] == 0 && b[2] == 0
          source.dup.force_encoding(::Encoding::UTF_16BE).encode!(::Encoding::UTF_8)
        when b.size >= 4 && b[1] == 0 && b[2] == 0 && b[3] == 0
          source.dup.force_encoding(::Encoding::UTF_32LE).encode!(::Encoding::UTF_8)
        when b.size >= 4 && b[1] == 0 && b[3] == 0
          source.dup.force_encoding(::Encoding::UTF_16LE).encode!(::Encoding::UTF_8)
        else
          source.dup
        end
    else
      source = source.encode(::Encoding::UTF_8)
    end
    source.force_encoding(::Encoding::ASCII_8BIT)
  else
    b = source
    source =
      case
      when b.size >= 4 && b[0] == 0 && b[1] == 0 && b[2] == 0
        JSON.iconv('utf-8', 'utf-32be', b)
      when b.size >= 4 && b[0] == 0 && b[2] == 0
        JSON.iconv('utf-8', 'utf-16be', b)
      when b.size >= 4 && b[1] == 0 && b[2] == 0 && b[3] == 0
        JSON.iconv('utf-8', 'utf-32le', b)
      when b.size >= 4 && b[1] == 0 && b[3] == 0
        JSON.iconv('utf-8', 'utf-16le', b)
      else
        b
      end
  end
  source
end
lex_array() click to toggle source
# File lib/muskox/json_lexer.rb, line 260
def lex_array
  raise NestingError, "nesting of #@current_nesting is too deep" if
    @max_nesting.nonzero? && @current_nesting > @max_nesting
  @callback.call :array_begin, nil
  delim = false
  until eos?
    case
    when (value = lex_value) != UNPARSED
      delim = false

      skip(IGNORE)
      if scan(COLLECTION_DELIMITER)
        delim = true
      elsif match?(ARRAY_CLOSE)
        ;
      else
        raise ParserError, "expected ',' or ']' in array at '#{peek(20)}'!"
      end
    when scan(ARRAY_CLOSE)
      if delim
        raise ParserError, "expected next element in array at '#{peek(20)}'!"
      end
      break
    when skip(IGNORE)
      ;
    else
      raise ParserError, "unexpected token in array at '#{peek(20)}'!"
    end
  end
  @callback.call :array_end, nil
end
lex_object() click to toggle source
# File lib/muskox/json_lexer.rb, line 292
def lex_object
  raise NestingError, "nesting of #@current_nesting is too deep" if
    @max_nesting.nonzero? && @current_nesting > @max_nesting


  @callback.call :object_begin, nil
  delim = false
  until eos?
    case
    when (string = parse_string) != UNPARSED
      @callback.call :property, string
      skip(IGNORE)
      unless scan(PAIR_DELIMITER)
        raise ParserError, "expected ':' in object at '#{peek(20)}'!"
      end
      skip(IGNORE)
      unless (value = lex_value).equal? UNPARSED
        delim = false
        skip(IGNORE)
        if scan(COLLECTION_DELIMITER)
          delim = true
        elsif match?(OBJECT_CLOSE)
          ;
        else
          raise ParserError, "expected ',' or '}' in object at '#{peek(20)}'!"
        end
      else
        raise ParserError, "expected value in object at '#{peek(20)}'!"
      end
    when scan(OBJECT_CLOSE)
      if delim
        raise ParserError, "expected next name, value pair in object at '#{peek(20)}'!"
      end
      break
    when skip(IGNORE)
      ;
    else
      raise ParserError, "unexpected token in object at '#{peek(20)}'!"
    end
  end
  @callback.call :object_end, nil
end
lex_value() click to toggle source
# File lib/muskox/json_lexer.rb, line 227
      def lex_value
        case
        when scan(FLOAT)
          @callback.call :float, Float(self[1])
        when scan(INTEGER)
          @callback.call :integer, Integer(self[1])
        when scan(TRUE)
          @callback.call :boolean, true
        when scan(FALSE)
          @callback.call :boolean, false
        when scan(NULL)
          @callback.call :null, nil
        when (string = parse_string) != UNPARSED
          @callback.call :string, string
        when scan(ARRAY_OPEN)
          @current_nesting += 1
          lex_array
          @current_nesting -= 1
        when scan(OBJECT_OPEN)
          @current_nesting += 1
          lex_object
          @current_nesting -= 1
#        when @allow_nan && scan(NAN)
#          NaN
#        when @allow_nan && scan(INFINITY)
#          Infinity
#        when @allow_nan && scan(MINUS_INFINITY)
#          MinusInfinity
        else
          UNPARSED
        end
      end
parse_string() click to toggle source
# File lib/muskox/json_lexer.rb, line 200
def parse_string
  if scan(STRING)
    return '' if self[1].empty?
    string = self[1].gsub(%r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n) do |c|
      if u = UNESCAPE_MAP[$&[1]]
        u
      else # \uXXXX
        bytes = EMPTY_8BIT_STRING.dup
        i = 0
        while c[6 * i] == ?\\ && c[6 * i + 1] == ?u
          bytes << c[6 * i + 2, 2].to_i(16) << c[6 * i + 4, 2].to_i(16)
          i += 1
        end
        JSON.iconv('utf-8', 'utf-16be', bytes)
      end
    end
    if string.respond_to?(:force_encoding)
      string.force_encoding(::Encoding::UTF_8)
    end
    string
  else
    UNPARSED
  end
rescue => e
  raise ParserError, "Caught #{e.class} at '#{peek(20)}': #{e}"
end