class Janis::Parsing::SpecificParsers::ProxyListOrgParser
Public Class Methods
new()
click to toggle source
Calls superclass method
Janis::Parsing::SpecificParsers::ProxyWebsiteParser::new
# File lib/janis/specific_parsers/proxy-list_org.rb, line 19 def initialize super configure_capybara @session = new_session @session.visit(url) obtain_html_doc end
url()
click to toggle source
# File lib/janis/specific_parsers/proxy-list_org.rb, line 15 def self.url 'http://proxy-list.org' end
Public Instance Methods
configure_capybara()
click to toggle source
# File lib/janis/specific_parsers/proxy-list_org.rb, line 27 def configure_capybara Capybara.configure { |c| c.app_host = url } end
parse()
click to toggle source
# File lib/janis/specific_parsers/proxy-list_org.rb, line 31 def parse total_rows = [] total_rows += rows [2,3,4,5,6,7,8,9,10].each do |page_number| @session.click_link(page_number.to_s) obtain_html_doc total_rows += rows end total_rows #TODO: This map is here to adapt #parse output to the one expected by Janis.find. Remove this when it starts accepting #more info about each proxy server. total_rows.map do |row| row.proxy end end
Private Instance Methods
obtain_html_doc()
click to toggle source
# File lib/janis/specific_parsers/proxy-list_org.rb, line 50 def obtain_html_doc @html_doc = Nokogiri.HTML(@session.html) end
rows()
click to toggle source
# File lib/janis/specific_parsers/proxy-list_org.rb, line 54 def rows rows_in_html = @html_doc.css('ul').select { |ul| ul.to_s.match /\d\d\d\./} results = rows_in_html.map do |row_html| row_object = Struct::Row.new( #TODO: This should be an actual class, and should have methods to retrieve all attributes. row_html.css('.proxy').children.last.text, row_html.css('.country').text, row_html.css('.city').text, row_html.css('.type').text, row_html.css('.speed').text, row_html.css('.https').text ) end end