class Misawa

Constants

DOMAIN
VERSION

Public Class Methods

new(name, page = 0) click to toggle source
# File lib/Ruby4Misawa.rb, line 21
def initialize(name, page = 0)
  @name = name
  @cid  = @name.is_a?(Integer) ? @name : name_to_cid(@name)
  @page = page
end

Public Instance Methods

scrape() click to toggle source
# File lib/Ruby4Misawa.rb, line 27
def scrape
  data = []

  begin
    nokogiri = Nokogiri.HTML(open(misawa_uri).read)
  rescue OpenURI::HTTPError
    raise NotFoundError
  end

  # parse some attributes
  nokogiri.xpath('//comment()[contains(., "rdf")]').each do |entry|
    attributes = Nokogiri.XML(entry.to_s.toutf8.gsub(/^<!--|-->$/, "")).child.css('rdf|Description')[0].attributes
    data << %w[title date identifier].inject({}) do |result, key|
      result[key.to_sym] = attributes[key].value
      result
    end
  end

  # parse images
  nokogiri.css('img.pict').to_a.each_with_index { |image, i|
    data[i].merge!(:image => image['src'], :body => image['alt'])
  }

  data
end

Private Instance Methods

misawa_uri() click to toggle source
# File lib/Ruby4Misawa.rb, line 55
def misawa_uri
  "#{DOMAIN}?cid=#{@cid}&page=#{@page}"
end
name_to_cid(name) click to toggle source
# File lib/Ruby4Misawa.rb, line 59
def name_to_cid(name)
  @@categories[name]
end