class MediaWikiTableScraper
Attributes
tables[R]
Public Class Methods
new(url)
click to toggle source
# File lib/mediawiki_table_scraper.rb, line 13 def initialize(url) doc = Nokorexi.new(url).to_doc tables = doc.root.css('.wikitable') # Fetch the records as an array of hash records for each table @tables = tables.map do |table| rows = table.xpath 'tr' # fetch the column names labels = rows.shift.xpath 'th/text()' names = labels.map {|x| x.downcase.to_sym } a = rows.map do |row| row.xpath('td').map do |x| if x.has_elements? then x.children.map do |c| c.is_a?(String) ? c : c.xml.gsub(/<\/?\w+[^>]*>/,'') end.join ' ' else x.text.to_s end end end a2 = a.map {|rows| names.zip(rows).to_h } end end
Public Instance Methods
to_a()
click to toggle source
# File lib/mediawiki_table_scraper.rb, line 51 def to_a() @tables end