class TeaShopper::SongScraper

Constants

BASE_URL

Path definitions

INDEX_URL

Public Instance Methods

scrape_profile_page(profile_url) click to toggle source
  1. Scrape individual tea pages, such as songtea.com/collections/oolong-tea/products/dragon-phoenix-tender-heart

Example return values: self.scrape_profile_page(profile_url) {

:size=>30.0, 
:price=>19.0, 
:price_per_oz=>20.10618, 
:flavors=>"Notes of orchid, spruce, and ghee.", 
:date=>"2019", 
:region=>"Taiwan", 
Removed for now
:detailed_instructions=>"This tea accommodates a range of brew styles...", 
:instructions=>"Brew: 6 grams・150 ml・203° F・2 min", 

:description=>"2019 marks our first year offering this oolong from Taiwan’s Dragon Phoenix Gorge. The cooler temperatures and mist-shrouded gardens of this region product tea with clarity, aromatics, and texture.\nDragon Phoenix Tender Heart is produced by a small farm operated by the Zhang family..."

}

# File lib/song_scraper.rb, line 73
def scrape_profile_page(profile_url)
  profile = {}

  # Store html document
  doc = Nokogiri::HTML(open(profile_url))
  container = doc.css("div#ProductSection div.product-single")
  
  # Get first selection from size and price select list
  size_price = container.css("form#AddToCartForm option").first.text.strip.split(" - ")

  # Get size, remove g, convert to float
  size = size_price.first[/\d+/].to_f
  profile[:size] = size

  # Get price, grab digits and decimal, convert to float. If price is 0.0, replace with "Sold Out".
  price = size_price.last[/\d+./].to_f
  price = "Sold Out" if price == 0.0
  profile[:price] = price

  # Calculate price per oz from initial price and size.
  # 30g size * 0.035274 conversion * price
  # If price is sold out, set to price_per_oz, as well.
  if price.is_a?(String)
    price_per_oz = price
    profile[:price_per_oz] = price_per_oz
  else 
    price_per_oz = size * 0.035274 * price
    profile[:price_per_oz] = price_per_oz.round(2)
  end

  # Gather all description paragraphs and separate into flavors, date, region. (And instructions and detailed instructions for future.)
  desc_array = container.css("div.product-description p").collect { |p| p.text }

  # Flavors
  profile[:flavors] = desc_array.shift
   
  # Remove second paragraph and separate into region and date
  region_year = desc_array.shift.split("・")
  profile[:date] = region_year[1]

  # Region. Grab text after "from" until the end
  profile[:region] = region_year.first[/(?<=from ).*/]
 
  # Future: when separating steep instructions, activate:
    # Steep instructions
    # Get detailed instructions first
    # profile[:detailed_instructions] = desc_array.pop
    # Get summary instructions next
    # profile[:instructions] = desc_array.pop
  
  # Full description
  profile[:description] = desc_array.join("\n\n")

  return profile
end
scrape_teas() click to toggle source
  1. Scrape teas from Song Teas by Type page: songtea.com/pages/tea-by-type

Example return values: {

:name=>"Aged Baozhong, 1960s", 
:type=>"aged", 
:shop_name=>"Song Tea & Ceramics", 
:url=>"/collections/aged-tea/products/aged-baozhong-1960s", 
:stock=>""

}

# File lib/song_scraper.rb, line 18
def scrape_teas
  teas = []

  # Store html, get tea profile container
  doc = Nokogiri::HTML(open(INDEX_URL))
  tea_types = doc.css("div.product-section")

  # Get shop name from meta tag
  shop_name = ""
  doc.css('meta').each { |meta| shop_name = meta.attr("content") if meta.attr("property") == "og:site_name" }

  # Iterate through tea types, then iterate through teas to create tea hash
  tea_types.each do |type|       
    type.css(".grid__item a.grid-link").each do |tea|
      
    # Replace "red" tea type with "black/red", to remove user confusion
    tea_type = type.attr("id").split("/").last.split("-").first 
    tea_type = "black/red" if tea_type == "red"
    
    # If tea is out of stock, store in hash
    tea.css("span.badge").text.include?("Sold Out")?stock = "sold out" : stock = ""

    # Add tea hash to array
    teas <<
    {
      :name => tea.css("p.grid-link__title").text,
      :type => tea_type,
      :shop_name => shop_name,
      :url => BASE_URL + tea.attr("href"),
      :stock => stock
    }
    end
  end

  # Return array
  return teas
end