class Crawler

Attributes

url[RW]

Public Class Methods

new(url, type) click to toggle source
# File lib/wpcrawler.rb, line 8
def initialize(url, type)
      @url = url
      @type = type
end

Public Instance Methods

scrape() click to toggle source
# File lib/wpcrawler.rb, line 13
def scrape
      begin
              spinner = TTY::Spinner.new("[:spinner]", format: :dots)
              spinner.auto_spin # Automatic animation with default interval
             output = File.open( "wpoutputfile.csv","w+" )
          output.puts "Date | Title | Author | Link | Status"
          (1..8).each do |n|
            response = HTTP.timeout(5).get("https://" + @url.to_s + "/wp-json/wp/v2/#{@type}/" +"?page=#{n}&per_page=100").to_s
            JSON.parse(response).each do |e|
              output.puts "#{e.fetch('date')}" + "| #{e&.dig('title','rendered')}" + "| #{e.fetch('author')}"+ "| #{e.dig('link')}"+ "| #{e.dig('status')}"
            end rescue TypeError
          end
          output.close
          spinner.stop('Done!') # Stop animation
      rescue HTTP::TimeoutError
              puts "Connection ERROR - make sure your website is a wordpress site with an open api"
      end
end