class Panner::Pans::Wordpress
Public Class Methods
eligible?(url)
click to toggle source
# File lib/panner/pans/wordpress.rb, line 2 def self.eligible?(url) url =~ /^https?:\/\/[^\/]+\.wordpress\.com/ end
new(url)
click to toggle source
# File lib/panner/pans/wordpress.rb, line 6 def initialize(url) @agent = Mechanize.new @next_url = url @page = nil end
Public Instance Methods
authenticate(options)
click to toggle source
# File lib/panner/pans/wordpress.rb, line 12 def authenticate(options) end
download()
click to toggle source
# File lib/panner/pans/wordpress.rb, line 15 def download @page = @agent.get(@next_url) puts "got page content" if @next_url.nil? puts "no more content" return end @page.search("article.post").map do |article| parse_article(article) end end
next()
click to toggle source
# File lib/panner/pans/wordpress.rb, line 29 def next link = @page.search("div.nav-links div.nav-previous a").first @next_url = link ? link['href'] : nil puts "next_url: @next_url" end
parse_article(article)
click to toggle source
# File lib/panner/pans/wordpress.rb, line 35 def parse_article(article) out = {} out[:title] = article.at_css(".entry-title").text out[:body] = Deba.extract(article.at_css(".entry-content").inner_html) out end