module ImageMetadataScraper

Constants

IMAGE_FILE_URL
SCRAPERS
VERSION

Public Class Methods

http_url(url) click to toggle source
# File lib/image_metadata_scraper.rb, line 44
def self.http_url(url)
  return if url.blank?

  url = url.strip
  scheme = url.match(/\A.+:\/\//)

  if scheme.nil?
    "http://#{url}"
  elsif scheme.to_s == 'http://' || scheme.to_s == 'https://'
    url
  end
end
redirect_from(url) click to toggle source
# File lib/image_metadata_scraper.rb, line 35
def self.redirect_from(url)
  response = Net::HTTP.get_response(URI(url.strip))
  case response.code when '301', '302'
    response.header['location']
  else
    url
  end
end
scrape(url) click to toggle source

Returns a hash of scraped image metadata that always contains: image_url: URL to the largest available image file thumbnail_url: URL to a small version of the image

and includes, if applicable, artist: the name of the artist (blogger) url: canonical URL to the image page (e.g. DeviantArt post)

Returns nil if scraping fails.

# File lib/image_metadata_scraper.rb, line 27
def self.scrape(url)
  url = http_url(url) or return
  url = redirect_from(url)

  scraper = SCRAPERS.detect { |regex, _| url =~ regex }&.last or return
  scraper.call(url)
end