class DoctorFinder::Scraper

The Scraper class

Constants

BASE_URL

Public Class Methods

scrape_by_zipcode(zipcode) click to toggle source
# File lib/doctor_finder/scraper.rb, line 9
def self.scrape_by_zipcode(zipcode)
  html = Nokogiri::HTML(open("#{BASE_URL}search?address=#{zipcode}&insurance_carrier=-1&day_filter=AnyDay&gender=-1&language=-1&offset=0&insurance_plan=-1&reason_visit=75&after_5pm=false&before_10am=false&sees_children=false&sort_type=Default&dr_specialty=153&"))
  slice = html.css('.js-prof-row-container')
  slice.each do |doctor| # will go through the HTML and create new doctor instances
    doc = DoctorFinder::Doctor.new
    doc.name = doctor.css('.js-profile-link').text.strip.gsub("\n", ' ').squeeze(' ')
    doc.speciality = doctor.css('.ch-prof-row-speciality').text.strip
    doc.url = BASE_URL + doctor.css('.js-profile-link')[0]['href']
    address = doctor.css('.js-search-prof-row-address').text.strip
    doc.street = address.slice(/^\d+[ ][\w+[ ]]+/) # To format the text correctly, had to use some regex
    doc.city = address[/[ ][ ]+[\w+[.]*[ ]]*[,]/].strip.chop
    doc.state = address[/[A-Z][A-Z]/]
    doc.zip = address[/\d{5}/] 
  end
  DoctorFinder::Doctor.all
end
scrape_for_details(doctor) click to toggle source
# File lib/doctor_finder/scraper.rb, line 26
def self.scrape_for_details(doctor)
  html = Nokogiri::HTML(open(doctor.url))
  doctor.details = html.css('.profile-professional-statement').text.squeeze(' ')
  if doctor.details.strip == ""
    doctor.details = "No further details were available."
  end
  doctor.areas = html.css('li.specialty').text.squeeze(" ").gsub("\r\n \r\n ", "\r\n").lstrip
  doctor
end