class Scraper

simple (reusable) scraper class that calls Nokogiri and dumps the requested site

Constants

CACHE_LOCATIONS
CHORUS_CHAMPS_SITE
LOCAL_SITES
LOCATIONS

site storage

QUARTET_CHAMPS_SITE

Public Class Methods

load_cache() click to toggle source

scraper should know what it's scraping, but should not worry about the data classes' architecture

# File lib/barbershop_contestants/scraper.rb, line 57
def self.load_cache
  # loaded = {}
  # CACHE_LOCATIONS.each do |key, loc|
  #   # load loc
  #   # loaded[key] = fopen(loc)
  # end
  nil
  # I'll have to figure this out later :/
  # try to get these to work:
  ##### doc = Nokogiri(string_or_io)
  ##### node.write_to(io, *options)
  ##### or
  ##### node.to_s / .to_html / .to_xml
end
scrape_and_create_chorus_champs(source) click to toggle source
# File lib/barbershop_contestants/scraper.rb, line 114
def self.scrape_and_create_chorus_champs(source)
  # binding.pry
  scrape_chorus_champs(source).each do |row|
    # build a hash
    row_data = row.text.split("\n")
    # binding.pry
    c_champs_hash = {
      year: row_data[1].to_i,
      name: row_data[2],
      hometown_and_district: row_data[3],
      director: row_data[4],
      number_on_stage: row_data[5],
      score: row_data[6],
      place: 1, # champions definitionally are first place
      type: "chorus"
    }
    Performance.find_or_create(c_champs_hash, "chorus")
  end
  system "clear" or system "cls"
end
scrape_and_create_quartet_champs(source) click to toggle source
# File lib/barbershop_contestants/scraper.rb, line 85
def self.scrape_and_create_quartet_champs(source)
  scrape_quartet_champs(source).each do |row|
    # binding.pry
    row_data = row.text.split("\n")
    q_champs_hash = {
      year: row_data[1].to_i,
      name: row_data[2],
      score: row_data[3],
      district: row_data[4],
      comments: row_data[5],
      members: row_data[7],
      place: 1, # champions definitionally are first place
      type: "quartet"
    }
    Performance.find_or_create(q_champs_hash, "quartet")
    # binding.pry
  end
  system "clear" or system "cls"
end
scrape_and_create_year(source, year, type) click to toggle source
# File lib/barbershop_contestants/scraper.rb, line 135
def self.scrape_and_create_year(source, year, type)
  if @years_scraped[type].include?(year)
    # puts "#{year} already scraped" # for debugging
    return true
  end
  @years_scraped[type] << year
  scrape_year(source, year, type).each do |t|
    # binding.pry
    t.each do |tr|
      row_data = tr.text.split("\n")
      # binding.pry
      year_hash = {
        year: year,
        place: row_data[1],
        name: row_data[2],
        district: row_data[3],
        score: row_data[4]
      }
      year_hash[:number_on_stage] = row_data[5] if type == "chorus"
      Performance.find_or_create(year_hash, type)
    end
  end
  system "clear" or system "cls"
end
scrape_chorus_champs(source) click to toggle source
# File lib/barbershop_contestants/scraper.rb, line 105
def self.scrape_chorus_champs(source)
  puts "Scraping Chorus Champs"
  location = LOCATIONS[:base][source] + LOCATIONS[:c_champs][source]
  doc = load_cache || scrape_or_load(location)
  champ_table = doc.css(".wikitable")[1].css("tr")
  champ_table.shift # remove header line
  champ_table
end
scrape_or_load(page) click to toggle source
# File lib/barbershop_contestants/scraper.rb, line 50
def self.scrape_or_load(page)
  load_cache || Nokogiri::HTML(open(page))
end
scrape_quartet_champs(source) click to toggle source
# File lib/barbershop_contestants/scraper.rb, line 72
def self.scrape_quartet_champs(source)
  # binding.pry
  puts "Scraping Quartet Champs"
  location = LOCATIONS[:base][source] + LOCATIONS[:q_champs][source]
  doc = load_cache || scrape_or_load(location)
  # puts "Scraping local copy of site"
  # TODO: reinstate real scraping functionality when in wifi
  # binding.pry
  champ_table = doc.css(".wikitable tbody tr") # get the champs table
  champ_table.shift # get rid of the headers (can't figure out how to differentiate them with css)
  champ_table
end
scrape_year(source, year, type) click to toggle source
# File lib/barbershop_contestants/scraper.rb, line 160
def self.scrape_year(source, year, type)
  puts "Scraping #{type.capitalize} Contest for #{year}"
  location = LOCATIONS[:base][source] + \
             LOCATIONS[(type[0] + "_year").to_sym][source].join(year.to_s)
  doc = load_cache || scrape_or_load(location)
  tables_node = doc.css(".wikitable")
  tables_arr = []
  tables_node.each do |t|
    # binding.pry
    unless t.css("tr").first.text.include?("Admin")
      tables_arr << t.css("tr").drop(1)
    end
  end
  tables_arr
  # binding.pry
end