module MxdTracker::Scraper
Public Class Methods
get_status(path, zip_code)
click to toggle source
# File lib/mxd_tracker/scraper.rb, line 7 def self.get_status(path, zip_code) data = scrape(path) sanitize_data(data, zip_code) end
sanitize_data(data, zip_code)
click to toggle source
# File lib/mxd_tracker/scraper.rb, line 12 def self.sanitize_data(data, zip_code) current_status = data["current_status"] if current_status != "" && current_status.present? && !current_status.nil? status = current_status.split(":").first.strip date = pull_date_from_status(current_status) conv_date = convert_time_to_utc(date, zip_code) if status == "Picked up date" data["current_status"] = "Picked Up" else data['current_status'] = "#{status}" end data["current_status_timestamp"] = "#{conv_date}" end progress_details = data["progress_details"] if progress_details != "" && progress_details.present? && !progress_details.nil? data["progress_details"].each do |progress_detail| progress_detail["date"] = "#{convert_time_to_utc(progress_detail['date'], zip_code)}" end end if data["error"].nil? || data["error"] == "" data["order_number"] = clean_up_strings(data["order_number"]) data["delivery_address"] = clean_up_strings(data["delivery_address"]) data["pick_up_address"] = clean_up_strings(data["pick_up_address"]) end data end
Private Class Methods
clean_up_strings(str)
click to toggle source
# File lib/mxd_tracker/scraper.rb, line 40 def self.clean_up_strings(str) str.split.join(" ").split(":").last.gsub(/\A\p{Space}*/, '') end
convert_time_to_utc(date, zip_code)
click to toggle source
# File lib/mxd_tracker/scraper.rb, line 58 def self.convert_time_to_utc(date, zip_code) MxdTracker::TimeConverter.convert_time_to_utc(date, zip_code) end
pull_date_from_status(current_status)
click to toggle source
# File lib/mxd_tracker/scraper.rb, line 44 def self.pull_date_from_status(current_status) date_regex = /(<Jan\w+|Feb\w+|Mar\w+|Apr\w+|Jun\w+|Jul\w+|Aug\w+|Sep\w+|Oct\w+|Nov\w+|Dec\w+\w*\s>) ([0-9]{1,2},) ([0-9]{4}) .+ ([0-2]?[0-9]{1}:[0-5]?[0-9]{1}) (AM|PM)/ found_date = nil if current_status =~ date_regex raw_date = date_regex.match(current_status).to_s found_date = Time.parse(raw_date).try(:strftime, '%Y-%m-%d %H:00') rescue nil end if found_date found_date else current_status.split(": ").last.split(", ").last.strip end end
scrape(url_path)
click to toggle source
# File lib/mxd_tracker/scraper.rb, line 62 def self.scrape(url_path) tracked_page = Wombat.crawl do base_url "http://www.homedirectusa.com/hdusaoms" path url_path error "css=#body_mxdcontainerTrack h3", :html current_status "css=.progress_details .delivered", :html do |status| Sanitize.fragment(status).strip end waybill_number "css=.reference_information .col tr:nth-child(1) td:nth-child(3) p", :html do |waybill| Sanitize.fragment(waybill) end tracking_number "css=.reference_information .col tr:nth-child(2) td:nth-child(3) p", :html order_number "css=.reference_information .col2 p" progress_details "css=.shipment_details_inner tbody tr", :iterator do |details| date "css=td:nth-child(1)" activity "css=td:nth-child(2)" location "css=td:nth-child(3)" end pick_up_address "css=#order_status_div .col p", :html do |address| Sanitize.fragment(address).strip end delivery_address "css=#order_status_div .col.right p:nth-child(1)", :html do |address| Sanitize.fragment(address).strip end end end