class Scrape
This gem goes to www.thestranger.com/seattle/Music and scrapes data for all the recommended concerts. The one method you need is get_music_listings
which returns an array of hashes of the form
- [:title=> “”, :venue=>“”, :date=>“”, :details=>“”], …
-
how to use this gem: scrape =
Scrape.new
scrape.get_music_listings that is all.
Public Class Methods
new(source_type = :web)
click to toggle source
Scrape
is meant to parse www.thestranger.com/music and pull out info on the recommended shows initialize with source_type = :web and url = “www.thestranger.com/music”
# File lib/musicscrape.rb, line 18 def initialize(source_type = :web) @source_type = source_type @url="http://www.thestranger.com/seattle/Music" end
Public Instance Methods
formatted_output()
click to toggle source
# File lib/musicscrape.rb, line 22 def formatted_output #gives you the same output you get from the command line @scrape = Scrape.new @scrape.get_music_listings.each do |lst| puts lst[:title] puts lst[:venue] puts lst[:date] puts lst[:details] puts "\n" end end
get_events(text_in)
click to toggle source
# File lib/musicscrape.rb, line 61 def get_events(text_in) #parses the html after it has been pared down and returns an array of hashes containing the data #returns [[:title=> "", :venue=>"", :date=>"", :details=>""], ...] @music_text = text_in #while there are still events left do this event_array = [] partition_array = [[:title,'</a><br/>'],[:venue,'<br />'],[:date,'<br />'],[:details,'</li>']] #fields we want to get and what to use for partition n=0 while @music_text.include? "http://www.thestranger.com/seattle/Event?event=" event_hash = {} @music_text = @music_text.partition(/\bevent=\b\d*.*/)[2] #pull off the stuff at the front partition_array.each do |id| my_array = @music_text.partition(id[1]) event_hash[id[0]] = my_array[0].strip #the bit before the partition @music_text = my_array[2] #the bit after the partition end event_hash[:details]= event_hash[:details].split().join(",") event_array[n] = event_hash n+=1 end event_array end
get_music_listings()
click to toggle source
# File lib/musicscrape.rb, line 34 def get_music_listings #Gives you an array of hashes @scrape = Scrape.new(:web) @pruned_page = @scrape.remove_excess(@scrape.load_page) @scrape.get_events(@pruned_page) #returns an array of hashes end
load_page()
click to toggle source
# File lib/musicscrape.rb, line 41 def load_page #Uses RestClient to get the html if @source_type == :web then (RestClient.get(@url)) else open("#{File.dirname(__FILE__)}/stranger.txt", &:read) end end
open_local()
click to toggle source
# File lib/musicscrape.rb, line 56 def open_local #opens a local file for testing open("#{File.dirname(__FILE__)}/stranger.txt", &:read) end
remove_excess(text_in)
click to toggle source
# File lib/musicscrape.rb, line 50 def remove_excess(text_in) #removes everthing except the recommended events section text_in = text_in.partition("<h2 class=\"sitesection\">Recommended Music Events</h2>").drop(1).join text_in.partition("<li class=")[0..1].join.strip end