class JsonProjection::Parser
A streaming JSON parser that generates SAX-like events for state changes. Use the json gem for small documents. Use this for huge documents that won't fit in memory.
Constants
- BACKSLASH
- COLON
- COMMA
- CONTROL
- DIGIT
- DIGIT_1_9
- DIGIT_END
- EXPONENT
- FALSE_KEYWORD
- FALSE_RE
- HEX
- LEFT_BRACE
- LEFT_BRACKET
- MINUS
- NULL_KEYWORD
- NULL_RE
- PLUS
- POINT
- QUOTE
- RIGHT_BRACE
- RIGHT_BRACKET
- SLASH
- TRUE_KEYWORD
- TRUE_RE
- U
- WS
- ZERO
Public Class Methods
Initialize a new parser with a stream. The stream cursor is advanced as events are drawn from the parser. The parser maintains a small data cache of bytes read from the stream.
- stream
-
IO IO stream to read data from.
- chunk_size
-
Integer
Number
of bytes to read from the stream at a time.
Returns nothing.
# File lib/json-projection/parser.rb, line 76 def initialize(stream, chunk_size = 4096) @stream = stream @chunk_size = chunk_size @event_buffer = Fifo.new @character_buffer = Buffer.new @characters_cursor = -1 @characters = nil @stream_position = -1 @state = :start_document @stack = [] @value_buffer = "" @unicode = "" end
Public Instance Methods
Draw bytes from the stream until an event can be constructed. May raise IO errors.
Returns a JsonProject::StreamEvent subclass or raises StandardError.
# File lib/json-projection/parser.rb, line 99 def next_event() # Are there any already read events, return the oldest event = @event_buffer.pop! return event unless event.nil? if @state == :end_document error("already EOF, no more events") end while true do if @characters.nil? || @characters_cursor == @characters.size data = stream.read(@chunk_size) if data == nil # hit EOF error("unexpected EOF") end @characters = @character_buffer.<<(data).each_char.to_a @characters_cursor = 0 end character = @characters[@characters_cursor] @characters_cursor += 1 @stream_position += 1 new_state, new_events = handle_character(@state, character) @state = new_state @event_buffer.prepend!(new_events) event = @event_buffer.pop! return event unless event.nil? end end
Private Instance Methods
Complete an object or array container value type.
type - The Symbol, :object or :array, of the expected type.
Raises a JSON::Stream::ParserError if the expected container type
was not completed.
Returns a tuple of (Symbol, Fifo
<Event>) instance or raises a JsonProjection::ParseError
if the character does not signal the start of a value.
# File lib/json-projection/parser.rb, line 486 def end_container(type) state = :end_value events = Fifo.pure if @stack.pop == type case type when :object then events.push!(EndObject.empty) when :array then events.push!(EndArray.empty) end else error("Expected end of #{type}") end if @stack.empty? state = :end_document events.push!(EndDocument.empty) end return state, events end
Advance the state machine and construct the event for the value just read.
Returns a JsonProjection::StreamEvent
subclass.
# File lib/json-projection/parser.rb, line 583 def end_value(value) case value when TrueClass, FalseClass Boolean.new(value) when Numeric Number.new(value) when ::String JsonProjection::String.new(value) when NilClass Null.empty end end
# File lib/json-projection/parser.rb, line 596 def error(message) raise ParseError, "#{message}: char #{@stream_position}" end
Given a state and new character, return a new state and fifo of events to yield to pull callers.
- state
-
Symbol
- ch
Returns a tuple of (Symbol, Fifo
<Event>) or raises StandardError.
# File lib/json-projection/parser.rb, line 148 def handle_character(state, ch) case state when :start_document case ch when WS return :start_document, Fifo.empty when LEFT_BRACE @stack.push(:object) events = Fifo.pure(StartObject.empty, StartDocument.empty) return :start_object, events when LEFT_BRACKET @stack.push(:array) events = Fifo.pure(StartArray.empty, StartDocument.empty) return :start_array, events end error('Expected whitespace, object `{` or array `[` start token') when :start_object case ch when WS return :start_object, Fifo.empty when QUOTE @stack.push(:key) return :start_string, Fifo.empty when RIGHT_BRACE return end_container(:object) end error('Expected object key `"` start') when :start_string case ch when QUOTE if @stack.pop == :string events = Fifo.pure(end_value(@value_buffer.dup)) @value_buffer.clear return :end_value, events else # :key events = Fifo.pure(Key.new(@value_buffer.dup)) @value_buffer.clear return :end_key, events end when BACKSLASH return :start_escape, Fifo.empty when CONTROL error('Control characters must be escaped') else @value_buffer << ch return :start_string, Fifo.empty end when :start_escape case ch when QUOTE, BACKSLASH, SLASH @value_buffer << ch return :start_string, Fifo.empty when B @value_buffer << "\b" return :start_string, Fifo.empty when F @value_buffer << "\f" return :start_string, Fifo.empty when N @value_buffer << "\n" return :start_string, Fifo.empty when R @value_buffer << "\r" return :start_string, Fifo.empty when T @value_buffer << "\t" return :start_string, Fifo.empty when U return :unicode_escape, Fifo.empty else error('Expected escaped character') end when :unicode_escape case ch when HEX @unicode << ch if @unicode.size == 4 codepoint = @unicode.slice!(0, 4).hex if codepoint >= 0xD800 && codepoint <= 0xDBFF error('Expected low surrogate pair half') if @stack[-1].is_a?(Fixnum) @stack.push(codepoint) return :start_surrogate_pair, Fifo.empty elsif codepoint >= 0xDC00 && codepoint <= 0xDFFF high = @stack.pop error('Expected high surrogate pair half') unless high.is_a?(Fixnum) pair = ((high - 0xD800) * 0x400) + (codepoint - 0xDC00) + 0x10000 @value_buffer << pair return :start_string, Fifo.empty else @value_buffer << codepoint return :start_string, Fifo.empty end end return :unicode_escape, Fifo.empty else error('Expected unicode escape hex digit') end when :start_surrogate_pair case ch when BACKSLASH return :start_surrogate_pair_u, Fifo.empty else error('Expected low surrogate pair half') end when :start_surrogate_pair_u case ch when U return :unicode_escape, Fifo.empty else error('Expected low surrogate pair half') end when :start_negative_number case ch when ZERO @value_buffer << ch return :start_zero, Fifo.empty when DIGIT_1_9 @value_buffer << ch return :start_int, Fifo.empty else error('Expected 0-9 digit') end when :start_zero case ch when POINT @value_buffer << ch return :start_float, Fifo.empty when EXPONENT @value_buffer << ch return :start_exponent, Fifo.empty else events = Fifo.pure(end_value(@value_buffer.to_i)) @value_buffer.clear state = :end_value state, new_events = handle_character(state, ch) events.prepend!(new_events) return state, events end when :start_float case ch when DIGIT @value_buffer << ch return :in_float, Fifo.empty end error('Expected 0-9 digit') when :in_float case ch when DIGIT @value_buffer << ch return :in_float, Fifo.empty when EXPONENT @value_buffer << ch return :start_exponent, Fifo.empty else events = Fifo.pure(end_value(@value_buffer.to_f)) @value_buffer.clear state = :end_value state, new_events = handle_character(state, ch) events.prepend!(new_events) return state, events end when :start_exponent case ch when MINUS, PLUS, DIGIT @value_buffer << ch return :in_exponent, Fifo.empty end error('Expected +, -, or 0-9 digit') when :in_exponent case ch when DIGIT @value_buffer << ch return :in_exponent, Fifo.empty else error('Expected 0-9 digit') unless @value_buffer =~ DIGIT_END events = Fifo.pure(end_value(@value_buffer.to_f)) @value_buffer.clear state = :end_value state, new_events = handle_character(state, ch) events.prepend!(new_events) return state, events end when :start_int case ch when DIGIT @value_buffer << ch return :start_int, Fifo.empty when POINT @value_buffer << ch return :start_float, Fifo.empty when EXPONENT @value_buffer << ch return :start_exponent, Fifo.empty else events = Fifo.pure(end_value(@value_buffer.to_i)) @value_buffer.clear state = :end_value state, new_events = handle_character(state, ch) events.prepend!(new_events) return state, events end when :start_true state, event = keyword(TRUE_KEYWORD, true, TRUE_RE, ch) if state.nil? return :start_true, Fifo.empty end return state, Fifo.pure(event) when :start_false state, event = keyword(FALSE_KEYWORD, false, FALSE_RE, ch) if state.nil? return :start_false, Fifo.empty end return state, Fifo.pure(event) when :start_null state, event = keyword(NULL_KEYWORD, nil, NULL_RE, ch) if state.nil? return :start_null, Fifo.empty end return state, Fifo.pure(event) when :end_key case ch when WS return :end_key, Fifo.empty when COLON return :key_sep, Fifo.empty end error('Expected colon key separator') when :key_sep case ch when WS return :key_sep, Fifo.empty else return start_value(ch) end when :start_array case ch when WS return :start_array, Fifo.empty when RIGHT_BRACKET return end_container(:array) else return start_value(ch) end when :end_value case ch when WS return :end_value, Fifo.empty when COMMA return :value_sep, Fifo.empty when RIGHT_BRACE return end_container(:object) when RIGHT_BRACKET return end_container(:array) end error('Expected comma `,` object `}` or array `]` close') when :value_sep if @stack[-1] == :object case ch when WS return :value_sep, Fifo.empty when QUOTE @stack.push(:key) return :start_string, Fifo.empty end error('Expected whitespace or object key start `"`') end case ch when WS return :value_sep, Fifo.empty else return start_value(ch) end when :end_document error('Unexpected data') unless ch =~ WS end end
Parse one of the three allowed keywords: true, false, null.
word - The String
keyword ('true', 'false', 'null'). value - The Ruby value (true, false, nil). re - The Regexp of allowed keyword characters. ch - The current String
character being parsed.
Raises a JSON::Stream::ParserError if the character does not belong
in the expected keyword.
Returns a JsonProjection::StreamEvent
? instance or raises.
# File lib/json-projection/parser.rb, line 520 def keyword(word, value, re, ch) if ch =~ re @value_buffer << ch else error("Expected #{word} keyword") end if @value_buffer.size != word.size return nil elsif @value_buffer == word event = end_value(value) @value_buffer.clear return :end_value, event else error("Expected #{word} keyword") end end
Process the first character of one of the seven possible JSON values: object, array, string, true, false, null, number.
Returns a JsonProjection::StreamEvent
? subclass.
# File lib/json-projection/parser.rb, line 546 def start_value(ch) case ch when LEFT_BRACE @stack.push(:object) return :start_object, Fifo.pure(StartObject.empty) when LEFT_BRACKET @stack.push(:array) return :start_array, Fifo.pure(StartArray.empty) when QUOTE @stack.push(:string) return :start_string, Fifo.empty when T @value_buffer << ch return :start_true, Fifo.empty when F @value_buffer << ch return :start_false, Fifo.empty when N @value_buffer << ch return :start_null, Fifo.empty when MINUS @value_buffer << ch return :start_negative_number, Fifo.empty when ZERO @value_buffer << ch return :start_zero, Fifo.empty when DIGIT_1_9 @value_buffer << ch return :start_int, Fifo.empty end error('Expected value') end
# File lib/json-projection/parser.rb, line 136 def stream @stream end