class Janis::Parsing::SpecificParsers::ProxyListOrgParser

Public Class Methods

new() click to toggle source
# File lib/janis/specific_parsers/proxy-list_org.rb, line 19
def initialize
  super
  configure_capybara
  @session = new_session 
  @session.visit(url)
  obtain_html_doc
end
url() click to toggle source
# File lib/janis/specific_parsers/proxy-list_org.rb, line 15
def self.url
  'http://proxy-list.org'
end

Public Instance Methods

configure_capybara() click to toggle source
# File lib/janis/specific_parsers/proxy-list_org.rb, line 27
def configure_capybara
  Capybara.configure { |c| c.app_host = url }
end
parse() click to toggle source
# File lib/janis/specific_parsers/proxy-list_org.rb, line 31
def parse
  total_rows = []
  total_rows += rows
  [2,3,4,5,6,7,8,9,10].each do |page_number|
    @session.click_link(page_number.to_s)
    obtain_html_doc
    total_rows += rows
  end
  total_rows

  #TODO: This map is here to adapt #parse output to the one expected by Janis.find. Remove this when it starts accepting
  #more info about each proxy server.
  total_rows.map do |row|
    row.proxy
  end
end

Private Instance Methods

obtain_html_doc() click to toggle source
# File lib/janis/specific_parsers/proxy-list_org.rb, line 50
def obtain_html_doc
  @html_doc = Nokogiri.HTML(@session.html)
end
rows() click to toggle source
# File lib/janis/specific_parsers/proxy-list_org.rb, line 54
def rows
  rows_in_html = @html_doc.css('ul').select { |ul| ul.to_s.match /\d\d\d\./}
  results = rows_in_html.map do |row_html| 
    row_object = Struct::Row.new( #TODO: This should be an actual class, and should have methods to retrieve all attributes.
    row_html.css('.proxy').children.last.text,
    row_html.css('.country').text,
    row_html.css('.city').text,
    row_html.css('.type').text,
    row_html.css('.speed').text,
    row_html.css('.https').text
                                )
  end
end