module MxdTracker::Scraper

Public Class Methods

get_status(path, zip_code) click to toggle source
# File lib/mxd_tracker/scraper.rb, line 7
def self.get_status(path, zip_code)
  data = scrape(path)
  sanitize_data(data, zip_code)
end
sanitize_data(data, zip_code) click to toggle source
# File lib/mxd_tracker/scraper.rb, line 12
def self.sanitize_data(data, zip_code)
  current_status = data["current_status"]
  if current_status != "" && current_status.present? && !current_status.nil?
      status = current_status.split(":").first.strip
      date = pull_date_from_status(current_status)
      conv_date = convert_time_to_utc(date, zip_code)
      if status == "Picked up date"
        data["current_status"] = "Picked Up"
      else
        data['current_status'] = "#{status}"
      end
      data["current_status_timestamp"] = "#{conv_date}"
  end
  progress_details = data["progress_details"]
  if progress_details != "" && progress_details.present? && !progress_details.nil?
    data["progress_details"].each do |progress_detail|
      progress_detail["date"] = "#{convert_time_to_utc(progress_detail['date'], zip_code)}"
    end
  end
  if data["error"].nil? || data["error"] == ""
    data["order_number"] = clean_up_strings(data["order_number"])
    data["delivery_address"] = clean_up_strings(data["delivery_address"])
    data["pick_up_address"] = clean_up_strings(data["pick_up_address"])
  end
  data
end

Private Class Methods

clean_up_strings(str) click to toggle source
# File lib/mxd_tracker/scraper.rb, line 40
def self.clean_up_strings(str)
  str.split.join(" ").split(":").last.gsub(/\A\p{Space}*/, '')
end
convert_time_to_utc(date, zip_code) click to toggle source
# File lib/mxd_tracker/scraper.rb, line 58
def self.convert_time_to_utc(date, zip_code)
  MxdTracker::TimeConverter.convert_time_to_utc(date, zip_code)
end
pull_date_from_status(current_status) click to toggle source
# File lib/mxd_tracker/scraper.rb, line 44
def self.pull_date_from_status(current_status)
  date_regex = /(<Jan\w+|Feb\w+|Mar\w+|Apr\w+|Jun\w+|Jul\w+|Aug\w+|Sep\w+|Oct\w+|Nov\w+|Dec\w+\w*\s>) ([0-9]{1,2},) ([0-9]{4}) .+ ([0-2]?[0-9]{1}:[0-5]?[0-9]{1}) (AM|PM)/
  found_date = nil
  if current_status =~ date_regex
    raw_date = date_regex.match(current_status).to_s
    found_date = Time.parse(raw_date).try(:strftime, '%Y-%m-%d %H:00') rescue nil
  end
  if found_date
    found_date
  else
    current_status.split(": ").last.split(", ").last.strip
  end
end
scrape(url_path) click to toggle source
# File lib/mxd_tracker/scraper.rb, line 62
def self.scrape(url_path)
  tracked_page = Wombat.crawl do
    base_url "http://www.homedirectusa.com/hdusaoms"
    path url_path
    error "css=#body_mxdcontainerTrack h3", :html
    current_status "css=.progress_details .delivered", :html do |status|
      Sanitize.fragment(status).strip
    end
    waybill_number "css=.reference_information .col tr:nth-child(1) td:nth-child(3) p", :html do |waybill|
      Sanitize.fragment(waybill)
    end
    tracking_number "css=.reference_information .col tr:nth-child(2) td:nth-child(3) p", :html
    order_number "css=.reference_information .col2 p"
    progress_details "css=.shipment_details_inner tbody tr", :iterator do |details|
      date "css=td:nth-child(1)"
      activity "css=td:nth-child(2)"
      location "css=td:nth-child(3)"
    end
    pick_up_address "css=#order_status_div .col p", :html do |address|
      Sanitize.fragment(address).strip
    end
    delivery_address "css=#order_status_div .col.right p:nth-child(1)", :html do |address|
      Sanitize.fragment(address).strip
    end
  end
end