class Mushy::ParseHtml
Public Class Methods
details()
click to toggle source
# File lib/mushy/fluxs/parse_html.rb, line 7 def self.details { name: 'ParseHtml', description: 'Parses HTML.', config: { path: { description: 'The path to the HTML in the incoming event.', type: 'text', value: 'body', }, extract: { description: 'The form of the event that is meant to be pulled from this event.', type: 'keyvalue', value: { url: 'a|@href' }, } }, } end
Public Instance Methods
process(event, config)
click to toggle source
# File lib/mushy/fluxs/parse_html.rb, line 26 def process event, config doc = Nokogiri::HTML event[config[:path]] matches = config[:extract].keys.reduce( { } ) do |matches, key| css, value = config[:extract][key].split('|') value = value || './node()' matches[key] = doc.css(css).map { |x| x.xpath(value).to_s } matches end matches[matches.keys.first] .each_with_index .map { |_, i| i } .map do |i| matches.keys.reduce(SymbolizedHash.new( { } )) do |record, key| record[key] = matches[key][i] record[key] = record[key].strip if record[key] record end end end