module Janis

TODO: Sites to be supported for scraping incloak.es/proxy-list/ spys.ru/free-proxy-list/ www.samair.ru/proxy/ www.proxys.com.ar/

Constants

IP_PORT_SEPARATOR
VERSION

Public Class Methods

find(amount, opts = {}) click to toggle source
# File lib/janis.rb, line 17
def self.find(amount, opts = {})

  # Makes sure opts[:websites] is a subset of the supported websites. Otherwise, it takes the whole list.
  if opts[:websites]
    opts[:websites].each do |website|
      raise "#{website} is not supported!" unless Janis.supported_websites.include?(website)
    end
    websites = opts[:websites]
  else
    websites = Janis.supported_websites
  end  

  total_results = []
    
  websites.each do |website| 
    if total_results.size < amount
      new_results = Parsing.parse_from(website).map { |entry| build_proxy_hash(entry, website) }
      total_results += new_results
    end
  end
  opts[:criteria] ? Janis::Testing.filter_results(criteria, total_results[0..amount - 1]) : total_results[0..amount -1]
      end
supported_websites() click to toggle source
# File lib/janis.rb, line 40
def self.supported_websites
  Janis::Parsing::SpecificParsers::ProxyWebsiteParser.subclasses.map { |klass| self.website_name_for(klass.to_s)}
end

Private Class Methods

build_proxy_hash(proxy_string, website) click to toggle source
# File lib/janis.rb, line 46
def self.build_proxy_hash(proxy_string, website)
  { 
    ip: proxy_string.split(IP_PORT_SEPARATOR).first,
    port: proxy_string.split(IP_PORT_SEPARATOR).last,
    source: website
  }
end
website_name_for(parser_klass_name) click to toggle source

TODO: This should be probably moved to a name helper module

# File lib/janis.rb, line 55
def self.website_name_for(parser_klass_name)
  parser_klass_name.gsub(/::/, '/').
  gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
  gsub(/([a-z\d])([A-Z])/,'\1_\2').
  tr("-", "_").
  gsub("_Parser","").
  split('/').
  last.
  downcase.to_sym
  #TODO: converts a parser class name to a :symbol_in_snake_case website name
end