class BuntoImport::Importers::WordpressDotCom

Public Class Methods

download_images(title, post_hpricot, assets_folder) click to toggle source

Will modify post DOM tree

# File lib/bunto-import/importers/wordpressdotcom.rb, line 25
def self.download_images(title, post_hpricot, assets_folder)
  images = (post_hpricot/"img")
  if images.length == 0
    return
  end
  puts "Downloading images for " + title
  images.each do |i|
    uri = i["src"]

    i["src"] = "{{ site.baseurl }}/%s/%s" % [assets_folder, File.basename(uri)]
    dst = File.join(assets_folder, File.basename(uri))
    puts "  " + uri
    if File.exist?(dst)
      puts "    Already in cache. Clean assets folder if you want a redownload."
      next
    end
    begin
      open(uri, allow_redirections: :safe) {|f|
        File.open(dst, "wb") do |out|
          out.puts f.read
        end
      }
      puts "    OK!"
    rescue => e
      puts "    Error: #{e.message}"
      puts e.backtrace.join("\n")
    end
  end
end
process(options) click to toggle source
# File lib/bunto-import/importers/wordpressdotcom.rb, line 124
def self.process(options)
  source        = options.fetch('source', "wordpress.xml")
  fetch         = !options.fetch('no_fetch_images', false)
  assets_folder = options.fetch('assets_folder', 'assets')
  FileUtils.mkdir_p(assets_folder)

  import_count = Hash.new(0)
  doc = Hpricot::XML(File.read(source))
  # Fetch authors data from header
  authors = Hash[
    (doc/:channel/'wp:author').map do |author|
    [author.at("wp:author_login").inner_text.strip, {
      "login" => author.at("wp:author_login").inner_text.strip,
      "email" => author.at("wp:author_email").inner_text,
      "display_name" => author.at("wp:author_display_name").inner_text,
      "first_name" => author.at("wp:author_first_name").inner_text,
      "last_name" => author.at("wp:author_last_name").inner_text
    }]
    end
  ] rescue {}

  (doc/:channel/:item).each do |node|
    item = Item.new(node)
    categories = node.search('category[@domain="category"]').map(&:inner_text).reject{|c| c == 'Uncategorized'}.uniq
    tags = node.search('category[@domain="post_tag"]').map(&:inner_text).uniq

    metas = Hash.new
    node.search("wp:postmeta").each do |meta|
      key = meta.at('wp:meta_key').inner_text
      value = meta.at('wp:meta_value').inner_text
      metas[key] = value
    end

    author_login = item.text_for('dc:creator').strip

    header = {
      'layout'     => item.post_type,
      'title'      => item.title,
      'date'       => item.published_at,
      'type'       => item.post_type,
      'published'  => item.published?,
      'status'     => item.status,
      'categories' => categories,
      'tags'       => tags,
      'meta'       => metas,
      'author'     => authors[author_login]
    }

    begin
      content = Hpricot(item.text_for('content:encoded'))
      header['excerpt'] = item.excerpt if item.excerpt

      if fetch
        download_images(item.title, content, assets_folder)
      end

      FileUtils.mkdir_p item.directory_name
      File.open(File.join(item.directory_name, item.file_name), "w") do |f|
        f.puts header.to_yaml
        f.puts '---'
        f.puts Util.wpautop(content.to_html)
      end
    rescue => e
      puts "Couldn't import post!"
      puts "Title: #{item.title}"
      puts "Name/Slug: #{item.file_name}\n"
      puts "Error: #{e.message}"
      next
    end

    import_count[item.post_type] += 1
  end

  import_count.each do |key, value|
    puts "Imported #{value} #{key}s"
  end
end
require_deps() click to toggle source
# File lib/bunto-import/importers/wordpressdotcom.rb, line 6
def self.require_deps
  BuntoImport.require_with_fallback(%w[
    rubygems
    fileutils
    safe_yaml
    hpricot
    time
    open-uri
    open_uri_redirections
  ])
end
sluggify(title) click to toggle source
# File lib/bunto-import/importers/wordpressdotcom.rb, line 202
def self.sluggify(title)
  title.gsub(/[^[:alnum:]]+/, '-').downcase
end
specify_options(c) click to toggle source
# File lib/bunto-import/importers/wordpressdotcom.rb, line 18
def self.specify_options(c)
  c.option 'source', '--source FILE', 'WordPress export XML file (default: "wordpress.xml")'
  c.option 'no_fetch_images', '--no-fetch-images', 'Do not fetch the images referenced in the posts'
  c.option 'assets_folder', '--assets_folder FOLDER', 'Folder where assets such as images will be downloaded to (default: assets)'
end