class AikatsuCalendar::Scraper

Attributes

day[RW]
month[RW]
schedules[RW]
year[RW]

Public Class Methods

new() click to toggle source
# File lib/aikatsu_calendar/scraper.rb, line 22
def initialize
  @schedules = []
end
scrape(path=AikatsuCalendar::URL) click to toggle source
# File lib/aikatsu_calendar/scraper.rb, line 13
def self.scrape(path=AikatsuCalendar::URL)
  scraper = new()
  doc = open(path) {|f| Nokogiri::HTML.parse(f) }
  scraper.feed(doc)
  scraper.schedules.uniq do |x|
    [x[:type], x[:content], x[:date_from], x[:date_until]]
  end
end

Public Instance Methods

class_to_type(s) click to toggle source
# File lib/aikatsu_calendar/scraper.rb, line 94
def class_to_type(s)
  if s =~ /schedule-(\w+)/
    $1
  else
    nil
  end
end
feed(doc) click to toggle source
# File lib/aikatsu_calendar/scraper.rb, line 26
def feed(doc)
  container = doc.at_css(".info-schedule")
  container.css('table').each do |table|
    feed_table(table)
  end
end
feed_item(p) click to toggle source
# File lib/aikatsu_calendar/scraper.rb, line 56
def feed_item(p)
  @schedules << parse_item(p)
end
feed_row(tr) click to toggle source
# File lib/aikatsu_calendar/scraper.rb, line 45
def feed_row(tr)
  # 日付
  text = tr.at_css('td').text
  m = text.match(/(\d+)日/) or raise ValueError, text
  @day = m[1].to_i

  tr.css('p').each do |p|
    feed_item(p)
  end
end
feed_table(table) click to toggle source
# File lib/aikatsu_calendar/scraper.rb, line 33
def feed_table(table)
  # 年と月
  text = table.at_css('th').text
  m = text.match(/(\d+)年(\d+)月/) or raise ValueError, text
  @year = m[1].to_i
  @month = m[2].to_i

  table.css('tr')[1..-1].each do |tr|
    feed_row(tr)
  end
end
parse_item(p) click to toggle source
# File lib/aikatsu_calendar/scraper.rb, line 60
def parse_item(p)
  # 日付
  text = p.text
  re = /(?: ※)?(\d+)年(\d+)月(\d+)日~(?:(\d+)年)?(\d+)月(\d+)日/
  m = text.match(re)
  if m
    year_until = (m[4] || m[1]).to_i
    date_from = Time.local(m[1].to_i, m[2].to_i, m[3].to_i)
    date_until = Time.local(year_until, m[5].to_i, m[6].to_i)
  else
    date_from = date_until = Time.local(@year, @month, @day)
  end
  # 日付をとっぱらう
  text = text.sub(re, '')

  # URL
  url = nil
  if (a = p.at_css('a[href]'))
    base = 'http://www.aikatsu.com/calender/'
    url = URI.join(base, a.attr(:href)).to_s
  end

  # type
  type = class_to_type(p.attr(:class))

  {
    type: type,
    date_from: date_from,
    date_until: date_until,
    content: text.strip,
    link: url,
  }
end
to_json(pretty=false) click to toggle source
# File lib/aikatsu_calendar/scraper.rb, line 102
def to_json(pretty=false)
  if pretty
    JSON.pretty_generate(@schedules)
  else
    JSON.dump(@schedules)
  end
end