class JsonProjection::Parser

A streaming JSON parser that generates SAX-like events for state changes. Use the json gem for small documents. Use this for huge documents that won't fit in memory.

Constants

BACKSLASH
COLON
COMMA
CONTROL
DIGIT
DIGIT_1_9
DIGIT_END
EXPONENT
FALSE_KEYWORD
FALSE_RE
HEX
LEFT_BRACE
LEFT_BRACKET
MINUS
NULL_KEYWORD
NULL_RE
PLUS
POINT
QUOTE
RIGHT_BRACE
RIGHT_BRACKET
SLASH
TRUE_KEYWORD
TRUE_RE
U
WS
ZERO

Public Class Methods

new(stream, chunk_size = 4096) click to toggle source

Initialize a new parser with a stream. The stream cursor is advanced as events are drawn from the parser. The parser maintains a small data cache of bytes read from the stream.

stream

IO IO stream to read data from.

chunk_size

Integer Number of bytes to read from the stream at a time.

Returns nothing.

# File lib/json-projection/parser.rb, line 76
def initialize(stream, chunk_size = 4096)
  @stream = stream
  @chunk_size = chunk_size

  @event_buffer = Fifo.new

  @character_buffer = Buffer.new

  @characters_cursor = -1
  @characters = nil

  @stream_position = -1
  @state = :start_document
  @stack = []

  @value_buffer = ""
  @unicode = ""
end

Public Instance Methods

next_event() click to toggle source

Draw bytes from the stream until an event can be constructed. May raise IO errors.

Returns a JsonProject::StreamEvent subclass or raises StandardError.

# File lib/json-projection/parser.rb, line 99
def next_event()
  # Are there any already read events, return the oldest
  event = @event_buffer.pop!
  return event unless event.nil?

  if @state == :end_document
    error("already EOF, no more events")
  end

  while true do
    if @characters.nil? || @characters_cursor == @characters.size
      data = stream.read(@chunk_size)
      if data == nil # hit EOF
        error("unexpected EOF")
      end

      @characters = @character_buffer.<<(data).each_char.to_a
      @characters_cursor = 0
    end

    character = @characters[@characters_cursor]
    @characters_cursor += 1

    @stream_position += 1

    new_state, new_events = handle_character(@state, character)

    @state = new_state
    @event_buffer.prepend!(new_events)

    event = @event_buffer.pop!
    return event unless event.nil?
  end
end

Private Instance Methods

end_container(type) click to toggle source

Complete an object or array container value type.

type - The Symbol, :object or :array, of the expected type.

Raises a JSON::Stream::ParserError if the expected container type

was not completed.

Returns a tuple of (Symbol, Fifo<Event>) instance or raises a JsonProjection::ParseError if the character does not signal the start of a value.

# File lib/json-projection/parser.rb, line 486
def end_container(type)
  state = :end_value
  events = Fifo.pure

  if @stack.pop == type
    case type
    when :object then
      events.push!(EndObject.empty)
    when :array  then
      events.push!(EndArray.empty)
    end
  else
    error("Expected end of #{type}")
  end

  if @stack.empty?
    state = :end_document
    events.push!(EndDocument.empty)
  end

  return state, events
end
end_value(value) click to toggle source

Advance the state machine and construct the event for the value just read.

Returns a JsonProjection::StreamEvent subclass.

# File lib/json-projection/parser.rb, line 583
def end_value(value)
  case value
  when TrueClass, FalseClass
    Boolean.new(value)
  when Numeric
    Number.new(value)
  when ::String
    JsonProjection::String.new(value)
  when NilClass
    Null.empty
  end
end
error(message) click to toggle source
# File lib/json-projection/parser.rb, line 596
def error(message)
  raise ParseError, "#{message}: char #{@stream_position}"
end
handle_character(state, ch) click to toggle source

Given a state and new character, return a new state and fifo of events to yield to pull callers.

state

Symbol

ch

String

Returns a tuple of (Symbol, Fifo<Event>) or raises StandardError.

# File lib/json-projection/parser.rb, line 148
def handle_character(state, ch)
  case state
  when :start_document
    case ch
    when WS
      return :start_document, Fifo.empty
    when LEFT_BRACE
      @stack.push(:object)

      events = Fifo.pure(StartObject.empty, StartDocument.empty)

      return :start_object, events
    when LEFT_BRACKET
      @stack.push(:array)

      events = Fifo.pure(StartArray.empty, StartDocument.empty)

      return :start_array, events
    end

    error('Expected whitespace, object `{` or array `[` start token')

  when :start_object
    case ch
    when WS
      return :start_object, Fifo.empty
    when QUOTE
      @stack.push(:key)
      return :start_string, Fifo.empty
    when RIGHT_BRACE
      return end_container(:object)
    end

    error('Expected object key `"` start')

  when :start_string
    case ch
    when QUOTE
      if @stack.pop == :string
        events = Fifo.pure(end_value(@value_buffer.dup))
        @value_buffer.clear

        return :end_value, events
      else # :key
        events = Fifo.pure(Key.new(@value_buffer.dup))
        @value_buffer.clear

        return :end_key, events
      end
    when BACKSLASH
      return :start_escape, Fifo.empty
    when CONTROL
      error('Control characters must be escaped')
    else
      @value_buffer << ch
      return :start_string, Fifo.empty
    end

  when :start_escape
    case ch
    when QUOTE, BACKSLASH, SLASH
      @value_buffer << ch
      return :start_string, Fifo.empty
    when B
      @value_buffer << "\b"
      return :start_string, Fifo.empty
    when F
      @value_buffer << "\f"
      return :start_string, Fifo.empty
    when N
      @value_buffer << "\n"
      return :start_string, Fifo.empty
    when R
      @value_buffer << "\r"
      return :start_string, Fifo.empty
    when T
      @value_buffer << "\t"
      return :start_string, Fifo.empty
    when U
      return :unicode_escape, Fifo.empty
    else
      error('Expected escaped character')
    end

  when :unicode_escape
    case ch
    when HEX
      @unicode << ch
      if @unicode.size == 4
        codepoint = @unicode.slice!(0, 4).hex
        if codepoint >= 0xD800 && codepoint <= 0xDBFF
          error('Expected low surrogate pair half') if @stack[-1].is_a?(Fixnum)
          @stack.push(codepoint)
          return :start_surrogate_pair, Fifo.empty
        elsif codepoint >= 0xDC00 && codepoint <= 0xDFFF
          high = @stack.pop
          error('Expected high surrogate pair half') unless high.is_a?(Fixnum)
          pair = ((high - 0xD800) * 0x400) + (codepoint - 0xDC00) + 0x10000
          @value_buffer << pair
          return :start_string, Fifo.empty
        else
          @value_buffer << codepoint
          return :start_string, Fifo.empty
        end
      end

      return :unicode_escape, Fifo.empty
    else
      error('Expected unicode escape hex digit')
    end

  when :start_surrogate_pair
    case ch
    when BACKSLASH
      return :start_surrogate_pair_u, Fifo.empty
    else
      error('Expected low surrogate pair half')
    end

  when :start_surrogate_pair_u
    case ch
    when U
      return :unicode_escape, Fifo.empty
    else
      error('Expected low surrogate pair half')
    end

  when :start_negative_number
    case ch
    when ZERO
      @value_buffer << ch
      return :start_zero, Fifo.empty
    when DIGIT_1_9
      @value_buffer << ch
      return :start_int, Fifo.empty
    else
      error('Expected 0-9 digit')
    end

  when :start_zero
    case ch
    when POINT
      @value_buffer << ch
      return :start_float, Fifo.empty
    when EXPONENT
      @value_buffer << ch
      return :start_exponent, Fifo.empty
    else
      events = Fifo.pure(end_value(@value_buffer.to_i))
      @value_buffer.clear

      state = :end_value

      state, new_events = handle_character(state, ch)
      events.prepend!(new_events)

      return state, events
    end

  when :start_float
    case ch
    when DIGIT
      @value_buffer << ch
      return :in_float, Fifo.empty
    end

    error('Expected 0-9 digit')

  when :in_float
    case ch
    when DIGIT
      @value_buffer << ch
      return :in_float, Fifo.empty
    when EXPONENT
      @value_buffer << ch
      return :start_exponent, Fifo.empty
    else
      events = Fifo.pure(end_value(@value_buffer.to_f))
      @value_buffer.clear

      state = :end_value

      state, new_events = handle_character(state, ch)
      events.prepend!(new_events)

      return state, events
    end

  when :start_exponent
    case ch
    when MINUS, PLUS, DIGIT
      @value_buffer << ch
      return :in_exponent, Fifo.empty
    end

    error('Expected +, -, or 0-9 digit')

  when :in_exponent
    case ch
    when DIGIT
      @value_buffer << ch
      return :in_exponent, Fifo.empty
    else
      error('Expected 0-9 digit') unless @value_buffer =~ DIGIT_END

      events = Fifo.pure(end_value(@value_buffer.to_f))
      @value_buffer.clear

      state = :end_value

      state, new_events = handle_character(state, ch)
      events.prepend!(new_events)

      return state, events
    end

  when :start_int
    case ch
    when DIGIT
      @value_buffer << ch
      return :start_int, Fifo.empty
    when POINT
      @value_buffer << ch
      return :start_float, Fifo.empty
    when EXPONENT
      @value_buffer << ch
      return :start_exponent, Fifo.empty
    else
      events = Fifo.pure(end_value(@value_buffer.to_i))
      @value_buffer.clear

      state = :end_value

      state, new_events = handle_character(state, ch)
      events.prepend!(new_events)

      return state, events
    end

  when :start_true
    state, event = keyword(TRUE_KEYWORD, true, TRUE_RE, ch)
    if state.nil?
      return :start_true, Fifo.empty
    end

    return state, Fifo.pure(event)
  when :start_false
    state, event = keyword(FALSE_KEYWORD, false, FALSE_RE, ch)
    if state.nil?
      return :start_false, Fifo.empty
    end

    return state, Fifo.pure(event)
  when :start_null
    state, event = keyword(NULL_KEYWORD, nil, NULL_RE, ch)
    if state.nil?
      return :start_null, Fifo.empty
    end

    return state, Fifo.pure(event)

  when :end_key
    case ch
    when WS
      return :end_key, Fifo.empty
    when COLON
      return :key_sep, Fifo.empty
    end

    error('Expected colon key separator')

  when :key_sep
    case ch
    when WS
      return :key_sep, Fifo.empty
    else
      return start_value(ch)
    end

  when :start_array
    case ch
    when WS
      return :start_array, Fifo.empty
    when RIGHT_BRACKET
      return end_container(:array)
    else
      return start_value(ch)
    end

  when :end_value
    case ch
    when WS
      return :end_value, Fifo.empty
    when COMMA
      return :value_sep, Fifo.empty
    when RIGHT_BRACE
      return end_container(:object)
    when RIGHT_BRACKET
      return end_container(:array)
    end

    error('Expected comma `,` object `}` or array `]` close')

  when :value_sep
    if @stack[-1] == :object
      case ch
      when WS
        return :value_sep, Fifo.empty
      when QUOTE
        @stack.push(:key)
        return :start_string, Fifo.empty
      end

      error('Expected whitespace or object key start `"`')
    end

    case ch
    when WS
      return :value_sep, Fifo.empty
    else
      return start_value(ch)
    end

  when :end_document
    error('Unexpected data') unless ch =~ WS
  end
end
keyword(word, value, re, ch) click to toggle source

Parse one of the three allowed keywords: true, false, null.

word - The String keyword ('true', 'false', 'null'). value - The Ruby value (true, false, nil). re - The Regexp of allowed keyword characters. ch - The current String character being parsed.

Raises a JSON::Stream::ParserError if the character does not belong

in the expected keyword.

Returns a JsonProjection::StreamEvent? instance or raises.

# File lib/json-projection/parser.rb, line 520
def keyword(word, value, re, ch)
  if ch =~ re
    @value_buffer << ch
  else
    error("Expected #{word} keyword")
  end

  if @value_buffer.size != word.size
    return nil
  elsif @value_buffer == word
    event = end_value(value)
    @value_buffer.clear

    return :end_value, event
  else
    error("Expected #{word} keyword")
  end
end
start_value(ch) click to toggle source

Process the first character of one of the seven possible JSON values: object, array, string, true, false, null, number.

ch

String The current character String.

Returns a JsonProjection::StreamEvent? subclass.

# File lib/json-projection/parser.rb, line 546
def start_value(ch)
  case ch
  when LEFT_BRACE
    @stack.push(:object)
    return :start_object, Fifo.pure(StartObject.empty)
  when LEFT_BRACKET
    @stack.push(:array)
    return :start_array, Fifo.pure(StartArray.empty)
  when QUOTE
    @stack.push(:string)
    return :start_string, Fifo.empty
  when T
    @value_buffer << ch
    return :start_true, Fifo.empty
  when F
    @value_buffer << ch
    return :start_false, Fifo.empty
  when N
    @value_buffer << ch
    return :start_null, Fifo.empty
  when MINUS
    @value_buffer << ch
    return :start_negative_number, Fifo.empty
  when ZERO
    @value_buffer << ch
    return :start_zero, Fifo.empty
  when DIGIT_1_9
    @value_buffer << ch
    return :start_int, Fifo.empty
  end

  error('Expected value')
end
stream() click to toggle source
# File lib/json-projection/parser.rb, line 136
def stream
  @stream
end