class Acme::Smileage::Blog::AmebloDownloader
Constants
- BLOG_ENTRY_AUTHOR_MAPPING
Public Instance Methods
get_entry_body(entry_link)
click to toggle source
# File lib/acme/smileage/blog/ameblo_downloader.rb, line 25 def get_entry_body(entry_link) with_nokogiri(entry_link) do |doc| Acme::Smileage::Blog::Entry::Body.new {|e| e.text = parse_text(doc, ".articleText") e.comment_link = parse_attr(doc, ".commentLink", :href) e.next_entry_link = parse_attr(doc, ".pagingNext", :href) e.prev_entry_link = parse_attr(doc, ".pagingPrev", :href) e.image_links = parse_image_list(doc) e.comments = parse_comment_list(entry_link, doc) } end end
get_entry_list(blog, blog_link, page=1)
click to toggle source
# File lib/acme/smileage/blog/ameblo_downloader.rb, line 11 def get_entry_list(blog, blog_link, page=1) unless blog_link Acme::Smileage::Blog::Entry::List.new end with_nokogiri(blog_link, "entrylist-#{page}.html") do |doc, uri| Acme::Smileage::Blog::Entry::List.new {|e| e.link = uri.to_s e.entries = parse_entry_list(blog, doc) e.next_page = parse_next_page(doc) } end end
Private Instance Methods
parse_attr(doc, css, attr) { |v| ... }
click to toggle source
# File lib/acme/smileage/blog/ameblo_downloader.rb, line 154 def parse_attr(doc, css, attr) e = doc.css(css) return nil if not e or e.empty? v = e[0][attr] v.strip! if v if block_given? yield v else v end end
parse_comment_list(entry_link, doc)
click to toggle source
# File lib/acme/smileage/blog/ameblo_downloader.rb, line 72 def parse_comment_list(entry_link, doc) doc.css(".commentList li").map {|li| Acme::Smileage::Blog::Entry::Comment.new {|e| e.link = "%s#%s" % [entry_link, li.css("a")[0][:id]] e.title = parse_text(li, ".commentHeader") e.text = parse_text(li, ".commentBody") e.author = parse_text(li, ".commentAuthor") e.author_link = parse_attr(li, ".commentAuthor", :href) e.datetime = parse_text(li, ".commentTime > time") } } end
parse_entry_list(blog, doc)
click to toggle source
# File lib/acme/smileage/blog/ameblo_downloader.rb, line 40 def parse_entry_list(blog, doc) doc.css(".contentsList li").map {|li| Acme::Smileage::Blog::Entry::Header.new(blog) {|e| e.link = parse_attr(li, ".contentTitle", :href) e.title = parse_text(li, ".contentTitle") e.datetime = parse_text(li, ".contentTime") e.comment_count = parse_number(li, ".contentComment") e.good_count = parse_number(li, "a.skinWeakColor") e.author = guess_author(e.link, e.title) e.define_singleton_method(:get_body, Proc.new { blog.get_entry_body(e) }) } } end
parse_image_list(doc)
click to toggle source
# File lib/acme/smileage/blog/ameblo_downloader.rb, line 63 def parse_image_list(doc) doc.css(".articleText a img").map{|e| src = e[:src] if src.sub!(/t[0-9]+_([0-9]+)/) { "o#{$1}" } src end }.compact end
parse_next_page(doc)
click to toggle source
# File lib/acme/smileage/blog/ameblo_downloader.rb, line 57 def parse_next_page(doc) parse_attr(doc, ".listPagePaging .pagingNext", :href) {|href| href[/entrylist-(\d+)/, 1].to_i if href } end
parse_number(doc, css)
click to toggle source
# File lib/acme/smileage/blog/ameblo_downloader.rb, line 144 def parse_number(doc, css) v = parse_text(doc, css) return nil if not v vv = v[/\d+/, 0] return nil if not vv vv.to_i end
parse_text(doc, css)
click to toggle source
# File lib/acme/smileage/blog/ameblo_downloader.rb, line 127 def parse_text(doc, css) e = doc.css(css) return nil if not e # これだと改行がうまく変換できない場合があるので自前で処理 # e.css("br").each {|ee| ee.replace("\n") } # e.text.strip v = e.to_s v.gsub!(/<br\/?>/i, "\n") v.gsub!(%r{<img.*?src="(http://stat\.ameba\.jp/user_images/.*?/)t[0-9]+_([0-9]+\.jpg)".*?>}) { "%so%s" % [$1, $2] } v.gsub!(/<.*?>/, "") v.gsub!(/\r/, "") v.gsub!(/\u00a0/, "") CGI.unescapeHTML(v.strip) if v end