class NPRScraper
Attributes
country_page[RW]
home_page[R]
path_to_country_data[R]
path_to_reactor_data[R]
pris_home[R]
reactor_page[RW]
Public Class Methods
new()
click to toggle source
# File lib/npr_scraper.rb, line 9 def initialize #default country set to FI, default reactor to LOVIISA-1 -- maybe remove the defaults? @home_page = "https://www.iaea.org" @pris_home = "#{@home_page}/PRIS/home.aspx" #page to draw available countries and reactors from together with their codes @path_to_country_data = "/PRIS/CountryStatistics/CountryDetails.aspx?current=" @path_to_reactor_data = "/PRIS/CountryStatistics/ReactorDetails.aspx?current=" # @country_page = "#{@home_page}#{@path_to_country_data}FI" # @reactor_page = "#{@home_page}#{@path_to_reactor_data}157" end
Public Instance Methods
scrape_available_countries()
click to toggle source
# File lib/npr_scraper.rb, line 18 def scrape_available_countries #scrapes the PRIS home page and returns a hash of country data that has the name & iso code for all available countries raw_text = Nokogiri::HTML(open(@pris_home)) selection_list = raw_text.css(".box-content.shortCutBox").css("#MainContent_ddlCountry").css("option") selection_list.each_with_object({}) do |country, scraped_country_ids| scraped_country_ids[country.values[0]] = country.text unless country.text == "" #Builds a hash: scraped_country_ids = {country1_name => iso1, country2_name => iso2, ...} end end
scrape_available_reactors()
click to toggle source
# File lib/npr_scraper.rb, line 28 def scrape_available_reactors #scrapes the PRIS home page and returns a hash of reactor data that has the name and id for all available reactors raw_text = Nokogiri::HTML(open(@pris_home)) selection_list = raw_text.css(".box-content.shortCutBox").css("#MainContent_ddlReactors").css("option") selection_list.each_with_object({}) do |reactor, scraped_reactor_ids| scraped_reactor_ids[reactor.values[0]] = reactor.text unless reactor.text == "" #Builds a hash: scraped_reactor_ids = {reactor1_name => id1, reactor2_name => id2, ...} end end
scrape_country_data(country_iso)
click to toggle source
# File lib/npr_scraper.rb, line 38 def scrape_country_data(country_iso) #scrapes the PRIS country_page and returns a hash of country data that lists the energy production and the names of reactors in that country @country_page = "#{@home_page}#{@path_to_country_data}#{country_iso}" raw_text = Nokogiri::HTML(open(@country_page)) summary_data_keys = raw_text.css(".box-content").css("td").css("label") summary_data = raw_text.css(".box-content").css("td").css("h2") reactor_table = raw_text.css(".tablesorter").css("td").css("a") country = {} country[:iso] = country_iso summary_data_keys.each_with_index do |key, i| key_string = key.text.strip!.downcase!.match(/\b"?(\w+)\-?\s?(\w+)?\b/).captures if key_string[1].nil? country[key_string[0].to_sym] = summary_data[i].text.strip! else country[key_string.join("_").to_sym] = summary_data[i].text.strip! end end #calculate the share of energy produced with nuclear power vs total energy produced nep = country[:nuclear_electricity].gsub(/\sGW\.h/, "").to_f tep = country[:total_electricity].gsub(/\sGW\.h/, "").to_f country[:nuclear_e_share] = "#{((nep/tep)*100).round(2)}%" #find reactors country[:reactors] = reactor_table.collect {|reactor| reactor.text} country end
scrape_reactor_data(reactor_id)
click to toggle source
# File lib/npr_scraper.rb, line 68 def scrape_reactor_data(reactor_id) #scrapes the PRIS reactor_page and returns a hash of reactor data @reactor_page = "#{@home_page}#{@path_to_reactor_data}#{reactor_id}" raw_text = Nokogiri::HTML(open(@reactor_page)) country_name = raw_text.css(".sidebar").css("#MainContent_litCaption").text.strip! reactor_data = raw_text.css(".box-content").css("span") reactor = {} reactor[:location] = country_name reactor[:status] = raw_text.css("#MainContent_MainContent_lblReactorStatus").text #add rest of the data with keys reactor_data.each do |data| reactor[data.values[0].match(/MainContent_MainContent_lbl(\w*)/).captures[0].to_sym] = data.text.strip end reactor end