class Query::Result::BaiduMobile

Public Instance Methods

ads_bottom() click to toggle source
# File lib/query/result/baidu_mobile.rb, line 20
def ads_bottom
  selector = "//*[@class='result']/following-sibling::div[not (contains(@class,'result'))]/div/div/a[not (contains(@href,'http://baozhang.baidu.com/guarantee'))]/.."
  @ads_bottom ||= @page.search(selector).map.with_index do |ad_div,index|
    parse_ad(ad_div).merge({:rank => (index + 1) + (@pagenumber -1) * 10})
  end
end
ads_right() click to toggle source
# File lib/query/result/baidu_mobile.rb, line 36
def ads_right
  []
end
ads_top() click to toggle source
# File lib/query/result/baidu_mobile.rb, line 13
def ads_top
  selector = "//*[@class='result']/preceding-sibling::div[not (contains(@class,'result'))]/div/div/a[not (contains(@href,'http://baozhang.baidu.com/guarantee'))]/.."
  @ads_top ||= @page.search(selector).map.with_index do |ad_div,index|
    parse_ad(ad_div).merge({:rank => (index + 1) + (@pagenumber -1) * 10})
  end
end
count() click to toggle source
# File lib/query/result/baidu_mobile.rb, line 46
def count

end
html() click to toggle source
# File lib/query/result/baidu_mobile.rb, line 32
def html
  @page.to_html
end
next_url() click to toggle source
# File lib/query/result/baidu_mobile.rb, line 40
def next_url
  next_bn = @page.search("//div[@id='pagenav']/a").first
  url = next_bn.nil? ? "/s?#{@baseuri.query}&pn=#{@pagenumber*10}" : next_bn['href']
  url
end
seo_ranks() click to toggle source
# File lib/query/result/baidu_mobile.rb, line 6
def seo_ranks
  s_res =  @page.at("//div[@id='results']")
  @seo_ranks ||= s_res.css("div.result").map.with_index do |seo_div,index|
    parse_seo(seo_div).merge({:rank => (index + 1) + (@pagenumber -1) * 10})
  end
end

Private Instance Methods

find_host(node) click to toggle source
# File lib/query/result/baidu_mobile.rb, line 108
def find_host(node)
        host = node.search(".//*[name()!='style' and (contains(text(),'.cn') or contains(text(),'com'))]")[0]
        host.nil? ? 'm.baidu.com' : host.text.split[0]
end
parse_ad(ad_div) click to toggle source
# File lib/query/result/baidu_mobile.rb, line 51
def parse_ad(ad_div)
  begin
    title_link = ad_div.search('a')[0]
    url = ad_div.search('link')
    if url.empty?
      url = ad_div.search(".//span[contains(text(),'.com')]")[0]
      url = url.nil? ? "http://m.baidu.com" : "http://#{url.text.strip}"
      title = title_link.text
    else
      url = url[0]['href']
      title = title_link.xpath("./text() | ./em").text
    end
     {
        :text => title.gsub(/\n|\s/,''),
        :href => title_link['href'],
        :host => Addressable::URI.parse(URI.encode(url)).host
     }
  rescue Exception => e
   warn "Error in parse_seo method : " + e.message
   {}
  end
end
parse_seo(seo_div) click to toggle source
# File lib/query/result/baidu_mobile.rb, line 74
def parse_seo(seo_div)
  begin
    title_link = seo_div.search('a')[0]
    href = title_link['href']
    href = href[/m.baidu.com/] ? href : "http://m.baidu.com#{href}"
    if seo_div['class']=='result'
      host, is_vr = seo_div.search(".//*[@class='site']")[0], false
      host = host.nil? ? find_host(seo_div) : host.text.split[0] 
    elsif seo_div['srcid']=='map'
      is_vr, host = true, 'map.baidu.com'          
    elsif seo_div['tpl'] and seo_div['data-log']
      url = JSON.parse(seo_div['data-log'].gsub("'",'"'))['mu']
      if url==''
          host = find_host(seo_div)
      else
        host = Addressable::URI.parse(URI.encode(url)).host
      end
      is_vr = true
    else 
          is_vr, host = true, find_host(seo_div)
    end
    #is_vr = (is_vr.nil? and !host[/baidu|nuomi/]) ? false : true
    {   
      :is_vr => false || is_vr,
      :text  => title_link.text.gsub(/\n|\s/,'')[0..30],
      :href  => href,
      :host  => host
    }
  rescue Exception => e
    warn "Error in parse_seo method : " + e.message
    {}
  end
end
redirect(url,limit = 10) click to toggle source
# File lib/query/result/baidu_mobile.rb, line 113
def redirect(url,limit = 10) 
  raise ArgumentError, 'Too many HTTP redirects' if limit == 0
  response = Net::HTTP.get_response(URI(url))
  case response
    when Net::HTTPSuccess then
      return URI(url).host
    when Net::HTTPRedirection then
      location = response['location']
      redirect(location, limit-1)
    else
      return "m.baidu.com"
  end
end