class Feedtxt::JsonParser
Constants
- FEED_BEGIN
note:
regex excape pipe: | to \| \\ needs to get escaped twice e.g. (\\ becomes \) e.g. |{ or |{{{
- FEED_END
- FEED_META
e.g. }—{ or }}}—{{{ or }-{
todo/check: also allow }.{ with dot why? why not? also allow } - { or } ---- { why? why not?
- FEED_NEXT
e.g.}/{ or }}}/{{{
todo/check: also allow }///{ or } /// { why,why not?
Public Class Methods
new( text )
click to toggle source
Note: lets keep/use same API as RSS::Parser for now
# File lib/feedtxt/parser/json.rb, line 17 def initialize( text ) @text = text end
parse( text, opts={} )
click to toggle source
convenience class/factory method
# File lib/feedtxt/parser/json.rb, line 12 def self.parse( text, opts={} ) self.new( text ).parse end
Public Instance Methods
parse()
click to toggle source
# File lib/feedtxt/parser/json.rb, line 42 def parse ## find start marker e.g. |>>> ## use regex - allow three or more >>>>>> or <<<<<< ## allow spaces before and after s = StringScanner.new( @text ) prolog = s.scan_until( /(?=#{FEED_BEGIN})/ ) ## pp prolog feed_begin = s.scan( /#{FEED_BEGIN}/ ) if feed_begin.empty? ## use blank? why? why not?? ## nothing found return empty array for now; return nil - why? why not? puts "warn !!! no begin marker found e.g. |>>>" return [] end buf = s.scan_until( /(?=#{FEED_END})/ ) buf = buf.strip # remove leading and trailing whitespace feed_end = s.scan( /#{FEED_END}/ ) if feed_end.empty? ## use blank? why? why not?? ## nothing found return empty array for now; return nil - why? why not? puts "warn !!! no end marker found e.g. <<<|" return [] end #### ## pass 1: split blocks by }/{ ### note: allows }}}/{{{ blocks = buf.split( /#{FEED_NEXT}/ ) ## pp blocks ## 1st block is feed meta data block1st = blocks.shift ## get/remove 1st block from blocks block1st = block1st.strip # remove leading and trailing whitespaces feed_metadata = ::JSON.parse( "{ #{block1st} }" ) feed_items = [] blocks.each do |block| ### note: do NOT use split e.g.--- is used by markdown ## only search for first --- to split (all others get ignored) ## todo: make three dashes --- (3) not hard-coded (allow more) s2 = StringScanner.new( block ) item_metadata = s2.scan_until( /(?=#{FEED_META})/ ) item_metadata = item_metadata.strip # remove leading and trailing whitespace item_metadata = ::JSON.parse( "{ #{item_metadata} }" ) ## convert to hash with yaml feed_meta = s2.scan( /#{FEED_META}/ ) item_content = s2.rest item_content = item_content.strip # remove leading and trailing whitespace feed_items << [item_metadata, item_content] end [ feed_metadata, feed_items ] end