class Workarea::Blog::Import::Wordpress::ContentCleaner

Public Class Methods

new(content, wordpress_hostname) click to toggle source
# File lib/workarea/blog/import/wordpress/content_cleaner.rb, line 6
def initialize(content, wordpress_hostname)
  @content = content
  @wordpress_hostname = wordpress_hostname
end

Public Instance Methods

clean() click to toggle source
# File lib/workarea/blog/import/wordpress/content_cleaner.rb, line 11
def clean
  @content = update_asset_paths
  @content = make_internal_links_relative
  @content
end

Private Instance Methods

ensure_schema(url) click to toggle source
# File lib/workarea/blog/import/wordpress/content_cleaner.rb, line 53
def ensure_schema(url)
  if url[/\Ahttp:\/\//] || url[/\Ahttps:\/\//]
    url
  else
    "https://#{url}"
  end
end
find_asset(uri) click to toggle source
# File lib/workarea/blog/import/wordpress/content_cleaner.rb, line 32
def find_asset(uri)
  name = uri.path.rpartition('.').first.split('/').join('-')
  Content::Asset.find_by(name: name) rescue Content::Asset.image_placeholder
end
update_asset_paths() click to toggle source
# File lib/workarea/blog/import/wordpress/content_cleaner.rb, line 19
def update_asset_paths
  doc = Nokogiri::HTML.fragment(@content)
  doc.search("img").each do |image|
    src = ensure_schema(image.attributes['src'].value)
    uri = URI.parse(src)
    next unless internal_link?(uri)
    new_asset = find_asset(uri)
    image.set_attribute("src", new_asset.url)
    image
  end
  doc.to_html
end