class Scraper
Attributes
name[RW]
source[RW]
Public Class Methods
new(source, name)
click to toggle source
# File lib/ext/scraper.rb, line 8 def initialize(source, name) @source = source @name = name @AppRoot = File.join(Dir.pwd, name) end
Public Instance Methods
open(*args)
click to toggle source
Calls superclass method
# File lib/ext/scraper.rb, line 14 def open(*args) super *args, allow_redirections: :safe, ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE end
scrap()
click to toggle source
# File lib/ext/scraper.rb, line 18 def scrap system 'mkdir', '-p', @AppRoot scraproot = @AppRoot system 'mkdir', '-p', scraproot + "/img" system 'mkdir', '-p', scraproot + "/js" system 'mkdir', '-p', scraproot + "/css" page = Nokogiri::HTML(open(@source)) e = /^.*\.(jpg|JPG|gif|GIF|png|PNG|tiff|tif|TIFF|TIF)/ n=0 page.xpath('//img/@data-src', '//img/@src').each do |img| # asset_name = '/img/' + n.to_s + File.basename(img.value, ".*") + "." + # e.match(File.extname(img.value)).to_a.last.to_s asset_name = img.value filename = scraproot + asset_name system 'mkdir', '-p', File.dirname(filename) open(filename , 'wb') do |file| puts "Writing #{filename}" file << open(URI.join( @source, img.value ).to_s).read img.content = asset_name end n += 1 end n=0 page.xpath('//link/@href').each do |link| asset_name = '/css/' + n.to_s + ".css" filename = scraproot + asset_name open(filename, 'wb') do |file| puts "Writing #{filename}" begin open(URI.join( @source, link.value ).to_s).read rescue else file << open(URI.join( @source, link.value ).to_s).read end link.content = asset_name end n = n + 1 end n=0 page.xpath('//script/@src').each do |script| asset_name = '/js/' + n.to_s + File.basename(script.value) filename = scraproot + asset_name open(filename, 'wb') do |file| puts "Writing #{filename}" file << open(URI.join( @source, script.value ).to_s).read script.content = asset_name end n = n + 1 end open(scraproot + "/" + "index.html", "wb") do |file| puts "Writing #{scraproot + "/" + "index.html"}" file.write(page) end end