class DomainsScanner::Crawlers::Baidu

Public Instance Methods

host() click to toggle source
# File lib/domains_scanner/crawlers/baidu.rb, line 4
def host
  "https://www.baidu.com"
end
keyword_field_name() click to toggle source
# File lib/domains_scanner/crawlers/baidu.rb, line 8
def keyword_field_name
  "wd"
end
parse_results(doc) click to toggle source
{title: “xxx”, url: “xxx”}, …
# File lib/domains_scanner/crawlers/baidu.rb, line 13
def parse_results(doc)
  items = doc.search(".result")
  items.map do |i|
    title = i.search("h3.t > a").text
    # Baidu encrypted the target url, so we can use show url only, but it is enough!
    # bbs.abc.net/for...php?...
    show_url = i.search("div:last-child > a.c-showurl")
    url = if show_url
      if show_url.text.start_with?("http")
        show_url.text
      else
        "http://#{show_url.text}"
      end
    end

    { title: i.text, url: URI.encode(url) }
  end
end