class Query::Result::Baidu
Public Instance Methods
ads_bottom()
click to toggle source
# File lib/query/result/baidu.rb, line 27 def ads_bottom @page.search("//div[@id='content_left']/*[not(contains(@class, 'result') or contains(@class, 'leftBlock') or name()='br' or @id='rs_top_new' or @id='super_se_tip' or @class='rs') and position()>=11]").map.with_index do |div, index| parse_ad(div).merge(:rank => (index + 1) + (@pagenumber -1) * 10) end end
ads_right()
click to toggle source
# File lib/query/result/baidu.rb, line 33 def ads_right @page.search("//div[@id='ec_im_container']/div[contains(@class, 'EC_idea')]").map.with_index do |div,index| parse_ad(div).merge(:rank => (index + 1) + (@pagenumber -1) * 10) end end
ads_top()
click to toggle source
# File lib/query/result/baidu.rb, line 21 def ads_top @page.search("//div[@id='content_left']/*[not(contains(@class, 'result') or contains(@class, 'leftBlock') or name()='br' or @id='rs_top_new' or @id='super_se_tip' or @class='rs') and position()<=7]").map.with_index do |div, index| parse_ad(div).merge(:rank => (index + 1) + (@pagenumber -1) * 10) end end
count()
click to toggle source
# File lib/query/result/baidu.rb, line 39 def count node = @page.search("//div[@class='nums']") + @page.search("//span[@class='nums']") @count ||= node.map{|num|num.content.gsub(/\D/,'').to_i unless num.nil?}.first @count end
has_result?()
click to toggle source
# File lib/query/result/baidu.rb, line 49 def has_result? submit = @page.search('//a[text()="提交网址"]').first return false if submit and submit['href'].include?'sitesubmit' return true end
html()
click to toggle source
# File lib/query/result/baidu.rb, line 6 def html @page.to_html end
next_url()
click to toggle source
# File lib/query/result/baidu.rb, line 55 def next_url next_btn = @page.search("//a[text()='下一页>']") return false if next_btn.empty? next_btn.first['href'] end
relatives()
click to toggle source
# File lib/query/result/baidu.rb, line 10 def relatives @page.search("//div[@id='rs']/table/tr/th/a").map{|tag| tag.text} end
seo_ranks()
click to toggle source
# File lib/query/result/baidu.rb, line 14 def seo_ranks return @ranks unless @ranks.nil? @page.search("//div[@id='content_left']/*[contains(@class, 'result')]").map.with_index do |div,index| parse_seo(div).merge(:rank => (index + 1) + (@pagenumber -1) * 10) end end
Private Instance Methods
parse_ad(div)
click to toggle source
# File lib/query/result/baidu.rb, line 62 def parse_ad(div) #@todo should be : title = %w(div[1]/h3/a tbody/tr[2]/td/a[1] a[1]).inject(nil){|ans, xpath| ans || div.xpath(xpath).first} url = %w(div[3]/span tbody/tr[2]/td/a[2] a[3]/font[last()]).inject(nil){|ans, xpath| ans || div.xpath(xpath).first} url = url.nil? ? 'www.baidu.com' : url.text url = "http://" + url begin { :text => title.text.strip, :href => title['href'].to_s.strip, :host => Addressable::URI.parse(URI.encode(url)).host } rescue Exception => e warn "Error in parse_ad method : " + e.message {} end end
parse_seo(div)
click to toggle source
# File lib/query/result/baidu.rb, line 81 def parse_seo(div) title = %w(div[1]/h3/a h3/a div/div[1]/div[1]/div tr[2]/td/table/tr/td/h3/a).inject(nil){|ans, xpath| ans || div.xpath(xpath).first} url = %w(span[@class="g"] span[@class="c-showurl"]/span[@class="c-showurl"] span[@class="c-showurl"] span[@class="op_wiseapp_showurl"] div[@class="op_zhidao_showurl"]).inject(nil){|ans, xpath| ans || div.search(xpath).first} url = url.nil? ? 'www.baidu.com' : url.text.sub(/\d{4}-\d{1,2}-\d{1,2}/,'').strip url = "http://" + url # url = Query::get_redirect_url(title['href'].to_s.strip) if url.include?('elong.com') && title['href'] # url = 'http://www.baidu.com' if url.empty? begin { :is_vr=> div['class'].include?("result-op"), :text => title.text.strip, :href => title['href'].to_s.strip, :host => Addressable::URI.parse(URI.encode(url)).host } rescue Exception => e warn "Error in parse_seo method : " + e.message {} end end