module BuntoImport::Importers::Blogger::BloggerAtomStreamListenerMethods
Attributes
comments[RW]
leave_blogger_info[RW]
original_url_base[R]
Public Instance Methods
get_post_data_from_in_entry_elem_info()
click to toggle source
# File lib/bunto-import/importers/blogger.rb, line 229 def get_post_data_from_in_entry_elem_info if (@in_entry_elem.nil? || ! @in_entry_elem.has_key?(:meta) || ! @in_entry_elem[:meta].has_key?(:kind)) nil elsif @in_entry_elem[:meta][:kind] == 'post' timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime('%Y-%m-%d') if @in_entry_elem[:meta][:original_url] original_uri = URI.parse(@in_entry_elem[:meta][:original_url]) original_path = original_uri.path.to_s filename = "%s-%s" % [timestamp, File.basename(original_path, File.extname(original_path))] @original_url_base = "#{original_uri.scheme}://#{original_uri.host}" elsif @in_entry_elem[:meta][:draft] # Drafts don't have published urls name = @in_entry_elem[:meta][:title] if name.nil? filename = timestamp else filename = "%s-%s" % [timestamp, CGI.escape(name.downcase).tr('+','-')] end else raise 'Original URL is missing' end header = { 'layout' => 'post', 'title' => @in_entry_elem[:meta][:title], 'date' => @in_entry_elem[:meta][:published], 'author' => @in_entry_elem[:meta][:author], 'tags' => @in_entry_elem[:meta][:category], } header['modified_time'] = @in_entry_elem[:meta][:updated] if @in_entry_elem[:meta][:updated] && @in_entry_elem[:meta][:updated] != @in_entry_elem[:meta][:published] header['thumbnail'] = @in_entry_elem[:meta][:thumbnail] if @in_entry_elem[:meta][:thumbnail] header['blogger_id'] = @in_entry_elem[:meta][:id] if @leave_blogger_info header['blogger_orig_url'] = @in_entry_elem[:meta][:original_url] if @leave_blogger_info && @in_entry_elem[:meta][:original_url] body = @in_entry_elem[:body] # body escaping associated with liquid if body =~ /{{/ body.gsub!(/{{/, '{{ "{{" }}') end if body =~ /{%/ body.gsub!(/{%/, '{{ "{%" }}') end { :filename => filename, :header => header, :body => body } elsif @in_entry_elem[:meta][:kind] == 'comment' timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime('%Y-%m-%d') if @in_entry_elem[:meta][:original_url] if not @comment_seq @comment_seq = 1 end original_uri = URI.parse(@in_entry_elem[:meta][:original_url]) original_path = original_uri.path.to_s filename = "%s-%s-%s" % [timestamp, File.basename(original_path, File.extname(original_path)), @comment_seq] @comment_seq = @comment_seq + 1 @original_url_base = "#{original_uri.scheme}://#{original_uri.host}" else raise 'Original URL is missing' end header = { 'date' => @in_entry_elem[:meta][:published], 'author' => @in_entry_elem[:meta][:author], 'blogger_post_id' => @in_entry_elem[:meta][:post_id], } header['modified_time'] = @in_entry_elem[:meta][:updated] if @in_entry_elem[:meta][:updated] && @in_entry_elem[:meta][:updated] != @in_entry_elem[:meta][:published] header['thumbnail'] = @in_entry_elem[:meta][:thumbnail] if @in_entry_elem[:meta][:thumbnail] header['blogger_id'] = @in_entry_elem[:meta][:id] if @leave_blogger_info header['blogger_orig_url'] = @in_entry_elem[:meta][:original_url] if @leave_blogger_info && @in_entry_elem[:meta][:original_url] body = @in_entry_elem[:body] # body escaping associated with liquid if body =~ /{{/ body.gsub!(/{{/, '{{ "{{" }}') end if body =~ /{%/ body.gsub!(/{%/, '{{ "{%" }}') end { :filename => filename, :header => header, :body => body } else nil end end
tag_end(tag)
click to toggle source
# File lib/bunto-import/importers/blogger.rb, line 181 def tag_end(tag) case tag when 'entry' raise 'nest entry element' unless @in_entry_elem if @in_entry_elem[:meta][:kind] == 'post' post_data = get_post_data_from_in_entry_elem_info if post_data target_dir = '_posts' target_dir = '_drafts' if @in_entry_elem[:meta][:draft] FileUtils.mkdir_p(target_dir) file_name = URI::decode("#{post_data[:filename]}.html") File.open(File.join(target_dir, file_name), 'w') do |f| f.flock(File::LOCK_EX) f << post_data[:header].to_yaml f << "---\n\n" f << post_data[:body] end end elsif @in_entry_elem[:meta][:kind] == 'comment' and @comments post_data = get_post_data_from_in_entry_elem_info if post_data target_dir = '_comments' FileUtils.mkdir_p(target_dir) file_name = URI::decode("#{post_data[:filename]}.html") File.open(File.join(target_dir, file_name), 'w') do |f| f.flock(File::LOCK_EX) f << post_data[:header].to_yaml f << "---\n\n" f << post_data[:body] end end end @in_entry_elem = nil end @tag_bread.pop end
tag_start(tag, attrs)
click to toggle source
# File lib/bunto-import/importers/blogger.rb, line 108 def tag_start(tag, attrs) @tag_bread = [] unless @tag_bread @tag_bread.push(tag) case tag when 'entry' raise 'nest entry element' if @in_entry_elem @in_entry_elem = {:meta => {}, :body => nil} when 'title' if @in_entry_elem raise 'only <title type="text"></title> is supported' if attrs['type'] != 'text' end when 'category' if @in_entry_elem if attrs['scheme'] == 'http://www.blogger.com/atom/ns#' @in_entry_elem[:meta][:category] = [] unless @in_entry_elem[:meta][:category] @in_entry_elem[:meta][:category] << attrs['term'] elsif attrs['scheme'] == 'http://schemas.google.com/g/2005#kind' kind = attrs['term'] kind.sub!(Regexp.new("^http://schemas\\.google\\.com/blogger/2008/kind\\#"), '') @in_entry_elem[:meta][:kind] = kind end end when 'content' if @in_entry_elem @in_entry_elem[:meta][:content_type] = attrs['type'] end when 'link' if @in_entry_elem if attrs['rel'] == 'alternate' && attrs['type'] == 'text/html' @in_entry_elem[:meta][:original_url] = attrs['href'] elsif attrs['rel'] == 'replies' && attrs['type'] == 'text/html' unless @in_entry_elem[:meta][:original_url] @in_entry_elem[:meta][:original_url] = attrs['href'].sub(/\#comment-form$/, '') end end end when 'media:thumbnail' if @in_entry_elem @in_entry_elem[:meta][:thumbnail] = attrs['url'] end when 'thr:in-reply-to' if @in_entry_elem @in_entry_elem[:meta][:post_id] = attrs['ref'] end end end
text(text)
click to toggle source
# File lib/bunto-import/importers/blogger.rb, line 156 def text(text) if @in_entry_elem case @tag_bread.last when 'id' @in_entry_elem[:meta][:id] = text when 'published' @in_entry_elem[:meta][:published] = text when 'updated' @in_entry_elem[:meta][:updated] = text when 'title' @in_entry_elem[:meta][:title] = text when 'content' @in_entry_elem[:body] = text when 'name' if @tag_bread[-2..-1] == %w[author name] @in_entry_elem[:meta][:author] = text end when 'app:draft' if @tag_bread[-2..-1] == %w[app:control app:draft] @in_entry_elem[:meta][:draft] = true if text == 'yes' end end end end