class Rumba::Crawler::Parser
Constants
- SK
Service Keys
Public Instance Methods
create_object(name, node, template)
click to toggle source
# File lib/rumba/crawler/parser.rb, line 37 def create_object(name, node, template) object = send(name) template.reject{|key, _| SK.include?(key)}.each do |key, value| if value.is_a? Array object.send("#{key}=", parse_multi(node, value.first)) else object.send("#{key}=", parse_node(node, value, key)) end end return object end
get_content(node, template)
click to toggle source
# File lib/rumba/crawler/parser.rb, line 49 def get_content(node, template) if template['regexp'] /#{template['regexp']}/i.match(node.content).to_s else node.content end end
get_node(doc, template)
click to toggle source
# File lib/rumba/crawler/parser.rb, line 57 def get_node(doc, template) if template.is_a?(String) doc.css(template) elsif template['root'] @doc.css(template['root']).css(template['css']) else doc.css(template['css']) end end
leaf_node?(template)
click to toggle source
# File lib/rumba/crawler/parser.rb, line 67 def leaf_node?(template) template.is_a?(String) || template.reject{|key, _| SK.include?(key)}.empty? end
parse_multi(doc, template)
click to toggle source
# File lib/rumba/crawler/parser.rb, line 18 def parse_multi(doc, template) result = [] template.each do |key, value| get_node(doc, value).each do |node| result << create_object(key, node, value) end end return result end
parse_node(doc, template, name)
click to toggle source
# File lib/rumba/crawler/parser.rb, line 28 def parse_node(doc, template, name) node = get_node(doc, template).first if leaf_node?(template) get_content(node, template) else create_object(name, node, template) end end
process(response, template)
click to toggle source
# File lib/rumba/crawler/parser.rb, line 8 def process(response, template) template = JSON.parse(template) @doc = Nokogiri::HTML(response) if template.is_a? Array parse_multi(@doc, template.first) else parse_node(@doc, template, template.keys.first) end end