class DomainsScanner::Crawlers::Google
Public Instance Methods
host()
click to toggle source
# File lib/domains_scanner/crawlers/google.rb, line 4 def host "https://google.com" end
keyword_field_name()
click to toggle source
# File lib/domains_scanner/crawlers/google.rb, line 8 def keyword_field_name "q" end
next_page_link_selector()
click to toggle source
# File lib/domains_scanner/crawlers/google.rb, line 27 def next_page_link_selector "div#foot .cur+td>a" end
parse_results(doc)
click to toggle source
- {title: “xxx”, url: “xxx”}, …
# File lib/domains_scanner/crawlers/google.rb, line 13 def parse_results(doc) items = doc.search(".g h3.r a") items.map do |i| title = i.text href = i.attributes["href"] && i.attributes["href"].value # https://bbs.abc.net/thread-144889-1-1.html&sa=U&ved=0ahUKEwjpmNT0ltnXAhXMxLwKHQJIAmE4ChAWCBQwAA&usg=AOvVaw31kkGPP7ZVlFGlAby9OkzE url = if href href.sub("/url?q=", "") end { title: i.text, url: url } end end