class SuperCrawler::Render

Render crawl results and processing.

Public Class Methods

console(crawl_results, max_pages) click to toggle source

Render sitemap in console Show, for each link, internal links and assets We will limit pages to display, because some sites have more than 1,000 pages

# File lib/super_crawler/render.rb, line 33
def self.console crawl_results, max_pages
  self.draw_line
  puts "Showing first #{max_pages} crawled pages and their contents:\n\n"
  crawl_results[0..(max_pages-1)].each_with_index do |result, index|
    puts "[#{index+1}] Content of #{result[:url]}\n"

    puts "     + Internal links: #{'None' if result[:links].empty?}"
    result[:links].each { |link| puts "            - #{link}" }

    puts "     + Internal images: #{'None' if result[:assets][:images].empty?}"
    result[:assets][:images].each { |link| puts "            - #{link}" }

    puts "     + Internal stylesheets: #{'None' if result[:assets][:stylesheets].empty?}"
    result[:assets][:stylesheets].each { |link| puts "            - #{link}" }

    puts "     + Internal scripts: #{'None' if result[:assets][:scripts].empty?}"
    result[:assets][:scripts].each { |link| puts "            - #{link}" }
    puts ""
  end
  self.draw_line
end
crawling_start_notice(start_url, threads) click to toggle source

Display a notice when starting a site crawl

# File lib/super_crawler/render.rb, line 18
def self.crawling_start_notice start_url, threads
  self.draw_line
  puts "Start crawling #{start_url} using #{threads} threads. Crawling rules:"
  puts "1. Consider only links starting with #{start_url}"
  puts "2. Remove the fragment part from the links (#...)"
  puts "3. Keep paths with different parameters (?...)"
  puts "4. Assets can be internal or external to the site"
  self.draw_line
end
crawling_summary_notice(total_time, threads_count, links_count) click to toggle source

Display final crawling summary after site crawling complete

# File lib/super_crawler/render.rb, line 68
def self.crawling_summary_notice total_time, threads_count, links_count
  puts
  self.draw_line
  puts "\e[33m[SUCCESS]\e[0m Crawled #{links_count} links in #{total_time.to_f.to_s} seconds using #{threads_count} threads."
  puts "Use .crawl_results to access the crawl results as an array of hashes."
  puts "Use .render to see the crawl_results as a sitemap."
  self.draw_line
end
error(message) click to toggle source

Display error message in the console.

# File lib/super_crawler/render.rb, line 11
def self.error message
  puts "\e[31m[ERROR]\e[0m #{message}"
end
log_status(url, crawl_results_length, links_length) click to toggle source

Log current search status (crawled links / total links)

# File lib/super_crawler/render.rb, line 58
def self.log_status url, crawl_results_length, links_length
  text = "Crawled #{crawl_results_length.to_s}/#{links_length.to_s}: #{url}"
  print "\r#{" "*100}\r" # Clean the previous text
  print (text.length <= 50) ? text : "#{text[0..46]}..."
  STDOUT.flush
end

Private Class Methods

draw_line() click to toggle source

Draw a line (because readability is also important!!)

# File lib/super_crawler/render.rb, line 82
def self.draw_line
  puts "#{'-' * 80}"
end