class TheScrap::Scrap
Attributes
base_url[RW]
data_proc[RW]
debug[RW]
debug?[RW]
detail_info[RW]
encoding[RW]
html_proc[RW]
item_frag[RW]
result_proc[RW]
url[RW]
verbose[RW]
verbose?[RW]
Public Class Methods
new()
click to toggle source
# File lib/the_scrap/scrap.rb, line 26 def initialize() @attrs = {} @more_info = [] @debug = false #@encoding = 'utf-8' @result_proc = [] @detail_info = [] @data_proc = [] @html_proc = [] end
Public Instance Methods
method_missing( method_id, *arguments, &block )
click to toggle source
# File lib/the_scrap/scrap.rb, line 54 def method_missing( method_id, *arguments, &block ) if(method_id =~ /attr_(.*)=/) name = $~[1] @attrs[name] = arguments.first end end
retryable( options = {} ) { || ... }
click to toggle source
# File lib/the_scrap/scrap.rb, line 37 def retryable( options = {} ) opts = { :tries => 1, :on => Exception }.merge(options) retry_exception, retries = opts[:on], opts[:tries] begin return yield rescue retry_exception if (retries -= 1) > 0 sleep 2 retry else raise end end end
Protected Instance Methods
get_attrs( url, doc, item_info )
click to toggle source
TODO document
# File lib/the_scrap/scrap.rb, line 63 def get_attrs( url, doc, item_info ) @attrs.keys.each do |k| unless @attrs[k].is_a? Array item_info[k] = doc.css(@attrs[k]).text.strip else option = @attrs[k] if option[0] == :frag_attr item_info[k] = doc[option[1]] next end node = doc.css(option[0]).first next unless node if(option[1] == :inner_html) item_info[k] = node.inner_html elsif(option[1] == :join) item_info[k] = doc.css(option[0]).map{|i|i.text}.join(',') elsif(option[1] == :array) item_info[k] = doc.css(option[0]).map{|i|i.text} else if [:href,:src].include? option[1].to_sym #why ??? src = node[option[1]].strip.gsub(" ","%20") begin item_info[k] = URI.join(base_url||url,src).to_s rescue item_info[k] = src.to_s end else item_info[k] = node[option[1]].strip end end end end end