class SuperCrawler::Render
Render
crawl results and processing.
Public Class Methods
console(crawl_results, max_pages)
click to toggle source
Render
sitemap in console Show, for each link, internal links and assets We will limit pages to display, because some sites have more than 1,000 pages
# File lib/super_crawler/render.rb, line 33 def self.console crawl_results, max_pages self.draw_line puts "Showing first #{max_pages} crawled pages and their contents:\n\n" crawl_results[0..(max_pages-1)].each_with_index do |result, index| puts "[#{index+1}] Content of #{result[:url]}\n" puts " + Internal links: #{'None' if result[:links].empty?}" result[:links].each { |link| puts " - #{link}" } puts " + Internal images: #{'None' if result[:assets][:images].empty?}" result[:assets][:images].each { |link| puts " - #{link}" } puts " + Internal stylesheets: #{'None' if result[:assets][:stylesheets].empty?}" result[:assets][:stylesheets].each { |link| puts " - #{link}" } puts " + Internal scripts: #{'None' if result[:assets][:scripts].empty?}" result[:assets][:scripts].each { |link| puts " - #{link}" } puts "" end self.draw_line end
crawling_start_notice(start_url, threads)
click to toggle source
Display a notice when starting a site crawl
# File lib/super_crawler/render.rb, line 18 def self.crawling_start_notice start_url, threads self.draw_line puts "Start crawling #{start_url} using #{threads} threads. Crawling rules:" puts "1. Consider only links starting with #{start_url}" puts "2. Remove the fragment part from the links (#...)" puts "3. Keep paths with different parameters (?...)" puts "4. Assets can be internal or external to the site" self.draw_line end
crawling_summary_notice(total_time, threads_count, links_count)
click to toggle source
Display final crawling summary after site crawling complete
# File lib/super_crawler/render.rb, line 68 def self.crawling_summary_notice total_time, threads_count, links_count puts self.draw_line puts "\e[33m[SUCCESS]\e[0m Crawled #{links_count} links in #{total_time.to_f.to_s} seconds using #{threads_count} threads." puts "Use .crawl_results to access the crawl results as an array of hashes." puts "Use .render to see the crawl_results as a sitemap." self.draw_line end
error(message)
click to toggle source
Display error message in the console.
# File lib/super_crawler/render.rb, line 11 def self.error message puts "\e[31m[ERROR]\e[0m #{message}" end
log_status(url, crawl_results_length, links_length)
click to toggle source
Log current search status (crawled links / total links)
# File lib/super_crawler/render.rb, line 58 def self.log_status url, crawl_results_length, links_length text = "Crawled #{crawl_results_length.to_s}/#{links_length.to_s}: #{url}" print "\r#{" "*100}\r" # Clean the previous text print (text.length <= 50) ? text : "#{text[0..46]}..." STDOUT.flush end
Private Class Methods
draw_line()
click to toggle source
Draw a line (because readability is also important!!)
# File lib/super_crawler/render.rb, line 82 def self.draw_line puts "#{'-' * 80}" end