class ScraperRb::Scraper

Constants

VALID_PARAMS

Attributes

options[RW]
response[RW]

Public Class Methods

new(url, params, extra_headers, timeout) click to toggle source
# File lib/scraper_rb.rb, line 33
def initialize(url, params, extra_headers, timeout)
  params = {} if params == nil
  default_headers = {
    'Accept' => 'application/json', 
    'apikey' => ENV['PROMPTAPI_TOKEN'],
  }
  default_headers.merge!(extra_headers) if extra_headers

  @options = {
    url: ENV['PROMPTAPI_TEST_ENDPOINT'] || 'https://api.promptapi.com/scraper',
    params: {url: url},
    request: {timeout: timeout},
    headers: default_headers,
  }
  params.each do |key, value|
    @options[:params][key] = value if VALID_PARAMS.map(&:to_sym).include?(key)
  end
  
  @response = {}
end

Public Instance Methods

get() click to toggle source
# File lib/scraper_rb.rb, line 62
def get
  unless @options[:headers]['apikey']
    @response = {error: "You need to set PROMPTAPI_TOKEN environment variable"}
    return
  end

  conn = Faraday.new(@options) do |c|
    c.use Faraday::Response::RaiseError
    c.use CustomURLMiddleware if ENV['RUBY_DEVELOPMENT']
  end

  begin
    response = conn.get
    @response = parse(response.body)
    @response[:data] = @response[:"data-selector"] if @response.key?(:"data-selector")
  rescue Faraday::ConnectionFailed
    @response = {error: "Connection error"}
  rescue Faraday::TimeoutError => e
    @response = {error: e.message.capitalize}
  rescue Faraday::ClientError => e
    @response = {error: parse(e.response[:body])}
  rescue Faraday::ServerError => e
    @response = {error: e.message.capitalize}
  end
end
parse(body) click to toggle source
# File lib/scraper_rb.rb, line 54
def parse(body)
  begin
    JSON.parse(body, symbolize_names: true)
  rescue JSON::ParserError
    {error: "JSON decoding error"}
  end
end
save(filename) click to toggle source
# File lib/scraper_rb.rb, line 88
def save(filename)
  return {error: 'Data is not available'} unless @response[:data]
  save_extension = '.html'
  save_data = @response[:data]
  if @response[:data].class == Array
    save_extension = '.json'
    save_data = JSON.generate(@response[:data])
  end
  file_dirname = File.dirname(filename)
  file_basename = File.basename(filename, save_extension)
  file_savename = "#{file_dirname}/#{file_basename}#{save_extension}"
  begin
    File.open(file_savename, 'w') {|file| file.write(save_data)}
    return {file: file_savename, size: File.size(file_savename)}
  rescue Errno::ENOENT => e
    return {error: "#{e}"}
  end
end