class Workarea::Blog::Import::Wordpress::ContentCleaner
Public Class Methods
new(content, wordpress_hostname)
click to toggle source
# File lib/workarea/blog/import/wordpress/content_cleaner.rb, line 6 def initialize(content, wordpress_hostname) @content = content @wordpress_hostname = wordpress_hostname end
Public Instance Methods
clean()
click to toggle source
# File lib/workarea/blog/import/wordpress/content_cleaner.rb, line 11 def clean @content = update_asset_paths @content = make_internal_links_relative @content end
Private Instance Methods
ensure_schema(url)
click to toggle source
# File lib/workarea/blog/import/wordpress/content_cleaner.rb, line 53 def ensure_schema(url) if url[/\Ahttp:\/\//] || url[/\Ahttps:\/\//] url else "https://#{url}" end end
find_asset(uri)
click to toggle source
# File lib/workarea/blog/import/wordpress/content_cleaner.rb, line 32 def find_asset(uri) name = uri.path.rpartition('.').first.split('/').join('-') Content::Asset.find_by(name: name) rescue Content::Asset.image_placeholder end
internal_link?(uri)
click to toggle source
# File lib/workarea/blog/import/wordpress/content_cleaner.rb, line 49 def internal_link?(uri) uri.hostname == @wordpress_hostname end
make_internal_links_relative()
click to toggle source
# File lib/workarea/blog/import/wordpress/content_cleaner.rb, line 37 def make_internal_links_relative doc = Nokogiri::HTML.fragment(@content) doc.search("a").each do |link| href = ensure_schema(link.attributes['href'].value) uri = URI.parse(href) next unless internal_link?(uri) link.set_attribute("href", uri.path) link end doc.to_html end
update_asset_paths()
click to toggle source
# File lib/workarea/blog/import/wordpress/content_cleaner.rb, line 19 def update_asset_paths doc = Nokogiri::HTML.fragment(@content) doc.search("img").each do |image| src = ensure_schema(image.attributes['src'].value) uri = URI.parse(src) next unless internal_link?(uri) new_asset = find_asset(uri) image.set_attribute("src", new_asset.url) image end doc.to_html end