class BuntoImport::Importers::WordpressDotCom
Public Class Methods
download_images(title, post_hpricot, assets_folder)
click to toggle source
Will modify post DOM tree
# File lib/bunto-import/importers/wordpressdotcom.rb, line 25 def self.download_images(title, post_hpricot, assets_folder) images = (post_hpricot/"img") if images.length == 0 return end puts "Downloading images for " + title images.each do |i| uri = i["src"] i["src"] = "{{ site.baseurl }}/%s/%s" % [assets_folder, File.basename(uri)] dst = File.join(assets_folder, File.basename(uri)) puts " " + uri if File.exist?(dst) puts " Already in cache. Clean assets folder if you want a redownload." next end begin open(uri, allow_redirections: :safe) {|f| File.open(dst, "wb") do |out| out.puts f.read end } puts " OK!" rescue => e puts " Error: #{e.message}" puts e.backtrace.join("\n") end end end
process(options)
click to toggle source
# File lib/bunto-import/importers/wordpressdotcom.rb, line 124 def self.process(options) source = options.fetch('source', "wordpress.xml") fetch = !options.fetch('no_fetch_images', false) assets_folder = options.fetch('assets_folder', 'assets') FileUtils.mkdir_p(assets_folder) import_count = Hash.new(0) doc = Hpricot::XML(File.read(source)) # Fetch authors data from header authors = Hash[ (doc/:channel/'wp:author').map do |author| [author.at("wp:author_login").inner_text.strip, { "login" => author.at("wp:author_login").inner_text.strip, "email" => author.at("wp:author_email").inner_text, "display_name" => author.at("wp:author_display_name").inner_text, "first_name" => author.at("wp:author_first_name").inner_text, "last_name" => author.at("wp:author_last_name").inner_text }] end ] rescue {} (doc/:channel/:item).each do |node| item = Item.new(node) categories = node.search('category[@domain="category"]').map(&:inner_text).reject{|c| c == 'Uncategorized'}.uniq tags = node.search('category[@domain="post_tag"]').map(&:inner_text).uniq metas = Hash.new node.search("wp:postmeta").each do |meta| key = meta.at('wp:meta_key').inner_text value = meta.at('wp:meta_value').inner_text metas[key] = value end author_login = item.text_for('dc:creator').strip header = { 'layout' => item.post_type, 'title' => item.title, 'date' => item.published_at, 'type' => item.post_type, 'published' => item.published?, 'status' => item.status, 'categories' => categories, 'tags' => tags, 'meta' => metas, 'author' => authors[author_login] } begin content = Hpricot(item.text_for('content:encoded')) header['excerpt'] = item.excerpt if item.excerpt if fetch download_images(item.title, content, assets_folder) end FileUtils.mkdir_p item.directory_name File.open(File.join(item.directory_name, item.file_name), "w") do |f| f.puts header.to_yaml f.puts '---' f.puts Util.wpautop(content.to_html) end rescue => e puts "Couldn't import post!" puts "Title: #{item.title}" puts "Name/Slug: #{item.file_name}\n" puts "Error: #{e.message}" next end import_count[item.post_type] += 1 end import_count.each do |key, value| puts "Imported #{value} #{key}s" end end
require_deps()
click to toggle source
# File lib/bunto-import/importers/wordpressdotcom.rb, line 6 def self.require_deps BuntoImport.require_with_fallback(%w[ rubygems fileutils safe_yaml hpricot time open-uri open_uri_redirections ]) end
sluggify(title)
click to toggle source
# File lib/bunto-import/importers/wordpressdotcom.rb, line 202 def self.sluggify(title) title.gsub(/[^[:alnum:]]+/, '-').downcase end
specify_options(c)
click to toggle source
# File lib/bunto-import/importers/wordpressdotcom.rb, line 18 def self.specify_options(c) c.option 'source', '--source FILE', 'WordPress export XML file (default: "wordpress.xml")' c.option 'no_fetch_images', '--no-fetch-images', 'Do not fetch the images referenced in the posts' c.option 'assets_folder', '--assets_folder FOLDER', 'Folder where assets such as images will be downloaded to (default: assets)' end