class Scraper

Attributes

name[RW]
source[RW]

Public Class Methods

new(source, name) click to toggle source
# File lib/ext/scraper.rb, line 8
def initialize(source, name)
  @source = source
  @name = name
  @AppRoot = File.join(Dir.pwd, name)
end

Public Instance Methods

open(*args) click to toggle source
Calls superclass method
# File lib/ext/scraper.rb, line 14
def open(*args)
  super *args, allow_redirections: :safe, ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE
end
scrap() click to toggle source
# File lib/ext/scraper.rb, line 18
def scrap
  system 'mkdir', '-p', @AppRoot
  scraproot =  @AppRoot
  system 'mkdir', '-p', scraproot + "/img"
  system 'mkdir', '-p', scraproot + "/js"
  system 'mkdir', '-p', scraproot + "/css"

  page = Nokogiri::HTML(open(@source))

  e = /^.*\.(jpg|JPG|gif|GIF|png|PNG|tiff|tif|TIFF|TIF)/

  n=0
  page.xpath('//img/@data-src', '//img/@src').each do |img|
    # asset_name = '/img/' + n.to_s + File.basename(img.value, ".*") + "." +
    #   e.match(File.extname(img.value)).to_a.last.to_s
    asset_name = img.value
    filename = scraproot + asset_name
    system 'mkdir', '-p', File.dirname(filename)

      open(filename , 'wb') do |file|
  puts "Writing #{filename}"
      file << open(URI.join( @source, img.value ).to_s).read
      img.content = asset_name
      end
       n += 1
  end

  n=0
  page.xpath('//link/@href').each do |link|
    asset_name = '/css/' + n.to_s + ".css"
    filename = scraproot + asset_name
    open(filename, 'wb') do |file|
      puts "Writing #{filename}"
      begin
        open(URI.join( @source, link.value ).to_s).read
        rescue
        else
          file << open(URI.join( @source, link.value ).to_s).read
     end
      link.content = asset_name
    end
    n = n + 1
  end

  n=0
  page.xpath('//script/@src').each do |script|
    asset_name = '/js/' + n.to_s + File.basename(script.value)
    filename = scraproot + asset_name
    open(filename, 'wb') do |file|
      puts "Writing #{filename}"
      file << open(URI.join( @source, script.value ).to_s).read
      script.content = asset_name
    end
  n = n + 1
  end
  open(scraproot + "/" + "index.html", "wb") do |file|
    puts "Writing #{scraproot + "/" + "index.html"}"
    file.write(page)
  end
end