class Values::Parser
Constants
- Token
Public Class Methods
new()
click to toggle source
# File lib/csv11.rb, line 38 def initialize end
Public Instance Methods
match_quoted?()
click to toggle source
todo/fix: use @buffer.match - “ (next letter MUST Not be ”!! e.g.“” not valid!!!
# File lib/csv11.rb, line 114 def match_quoted? @buffer.peek(1) == %{"} || ## double quote @buffer.peek(1) == %{'} ## single quote end
match_triple_quoted?()
click to toggle source
# File lib/csv11.rb, line 107 def match_triple_quoted? ## todo/fix: use @buffer.match - """ (next letter MUST Not be "!! e.g.""""" not valid!!! @buffer.peek(3) == %{"""} || ## double triple quotes @buffer.peek(3) == %{'''} ## single triple quotes end
parse(str)
click to toggle source
# File lib/csv11.rb, line 234 def parse(str) puts "" puts "**** parse >>#{str}<<" @values = [] @buffer = StringScanner.new(str) parse_line @values end
parse_line()
click to toggle source
how to handle:
country:dk mailto:hello ## use excluded (reserved names urn, mailto, http, https) http://example.com urn:hello:444 name:"hello, world!"
# File lib/csv11.rb, line 50 def parse_line loop do t = parse_token if t.nil? puts "!! format error: expected token with value, returns nil - rest is >>#{@buffer.rest}<<" break end if @buffer.peek(1) == ',' @buffer.getch ## consume ',' puts "adding value >>#{t.value}<<" @values << t.value elsif @buffer.eos? puts "adding (last) value >>#{t.value}<<" @values << t.value break elsif @buffer.peek(1) == ':' @buffer.getch ## consume ':' ## todo/fix: ## do NOT allow names for quoted, triple_quoted for now - why? why not? ## issue a format error: why? why not?? if @values.empty? ### note:special case: ## if first token is a name/key ## consume all the rest!! including commas, colons etc. ## no escape needed for nothing value = @buffer.rest value = value.strip puts "adding (single-line) first named value >>#{t.value}<< : >>#{value}<<" @values << [t.value,value] break else t2 = parse_token( match_name: false ) puts "adding named value >>#{t.value}<< : >>#{t2.value}<<" @values << [t.value,t2.value] if @buffer.peek(1) == ',' @buffer.getch ## consume ',' elsif @buffer.eos? break else puts "!! format error: expected comma (,) or EOS - rest is >>#{@buffer.rest}<<" break end end else puts "!! format error: expected comma (,) or colon (:) or EOS - rest is >>#{@buffer.rest}<<" break end end end
parse_quoted()
click to toggle source
# File lib/csv11.rb, line 152 def parse_quoted token = nil # nothing found if @buffer.peek(1) == '"' ## double quote @buffer.getch # consume double quote value = @buffer.scan_until( /(?=")/) @buffer.getch # consume double quote @buffer.skip( /[ \t]*/ ) ## skip trailing WHITESPACE puts %{quoted "..." value >>#{value}<<} token = Quoted.new( value ) elsif @buffer.peek(1) == "'" ## single quote @buffer.getch # consume single quote value = @buffer.scan_until( /(?=')/) @buffer.getch # consume single quote @buffer.skip( /[ \t]*/ ) ## skip trailing WHITESPACE puts %{quoted '...' value >>#{value}<<} token = Quoted.new( value ) else ## do nothing; report format error end token end
parse_token( match_name: true )
click to toggle source
# File lib/csv11.rb, line 217 def parse_token( match_name: true ) @buffer.skip( /[ \t]*/ ) ## skip WHITESPACE token = nil # nothing found if match_triple_quoted? # """...""" or '''...''' token = parse_triple_quoted elsif match_quoted? # "..." or '...' token = parse_quoted else token = parse_unquoted( match_name: match_name ) end token end
parse_triple_quoted()
click to toggle source
# File lib/csv11.rb, line 120 def parse_triple_quoted token = nil # nothing found if @buffer.peek(3) == %{"""} ## double quote @buffer.getch # consume double quote @buffer.getch @buffer.getch value = @buffer.scan_until( /(?=""")/) @buffer.getch # consume double quote @buffer.getch @buffer.getch @buffer.skip( /[ \t]*/ ) ## skip trailing WHITESPACE puts %{quoted """...""" value >>#{value}<<} token = TripleQuoted.new( value ) elsif @buffer.peek(3) == %{'''} ## single quote @buffer.getch # consume single quote @buffer.getch @buffer.getch value = @buffer.scan_until( /(?=''')/) @buffer.getch # consume single quote @buffer.getch @buffer.getch @buffer.skip( /[ \t]*/ ) ## skip trailing WHITESPACE puts %{quoted '''...''' value >>#{value}<<} token = TripleQuoted.new( value ) else ## do nothing; report format error end token end
parse_unquoted( match_name: true )
click to toggle source
# File lib/csv11.rb, line 177 def parse_unquoted( match_name: true ) ## unquoted value puts "collect unquoted token (match_name? => #{match_name}) - rest: >>#{@buffer.rest}<<" if match_name value = @buffer.scan_until( /(?=[,:]|$)/) ## check for reserverd "non-keys" e.g.: ### https: http: ### urn: ### mailto: ### file: ### add some more?? ## todo/fix: add ip address e.g. 127.0.0.1: too ?? reserved_names = %w{ https http urn mailto file } name_regex = %r{^[a-zA-Z0-9._-]+$} ## todo/fix: allow more chars if @buffer.peek(1) == ':' if reserved_names.include?( value ) ## continue scan until next comma or eos(end-of-string)!!! value << @buffer.scan_until( /(?=,|$)/) elsif name_regex.match( value ).nil? ## does NOT match name/key pattern ## continue scan until next comma or eos(end-of-string)!!! value << @buffer.scan_until( /(?=,|$)/) else ## continue end end else ## do NOT match name (named values) e.g. do NOT include colon (:) value = @buffer.scan_until( /(?=[,]|$)/) end value = value.rstrip ## right strip whitespace puts "value >>#{value}<<" token = Unquoted.new( value ) end